jiff/util/
array_str.rs

1/// A simple and not the most-efficient fixed size string on the stack.
2///
3/// This supplanted some uses of `Box<str>` for storing tiny strings in an
4/// effort to reduce our dependence on dynamic memory allocation.
5///
6/// Also, since it isn't needed and it lets us save on storage requirements,
7/// `N` must be less than `256` (so that the length can fit in a `u8`).
8#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)]
9pub(crate) struct ArrayStr<const N: usize> {
10    /// The UTF-8 bytes that make up the string.
11    ///
12    /// This array---the entire array---is always valid UTF-8. And
13    /// the `0..self.len` sub-slice is also always valid UTF-8.
14    bytes: [u8; N],
15    /// The number of bytes used by the string in `bytes`.
16    ///
17    /// (We could technically save this byte in some cases and use a NUL
18    /// terminator. For example, since we don't permit NUL bytes in POSIX time
19    /// zone abbreviation strings, but this is simpler and only one byte and
20    /// generalizes. And we're not really trying to micro-optimize the storage
21    /// requirements when we use these array strings. Or at least, I don't know
22    /// of a reason to.)
23    len: u8,
24}
25
26impl<const N: usize> ArrayStr<N> {
27    /// Creates a new fixed capacity string.
28    ///
29    /// If the given string exceeds `N` bytes, then this returns
30    /// `None`.
31    pub(crate) fn new(s: &str) -> Option<ArrayStr<N>> {
32        let len = s.len();
33        if len > N {
34            return None;
35        }
36        let mut bytes = [0; N];
37        bytes[..len].copy_from_slice(s.as_bytes());
38        // OK because we don't ever use anything bigger than u8::MAX for `N`.
39        // And we probably shouldn't, because that would be a pretty chunky
40        // array. If such a thing is needed, please file an issue to discuss.
41        debug_assert!(
42            N <= usize::from(u8::MAX),
43            "size of ArrayStr is too big"
44        );
45        let len = u8::try_from(len).unwrap();
46        Some(ArrayStr { bytes, len })
47    }
48
49    /// Returns the capacity of this fixed string.
50    pub(crate) const fn capacity() -> usize {
51        N
52    }
53
54    /// Append the bytes given to the end of this string.
55    ///
56    /// If the capacity would be exceeded, then this is a no-op and `false`
57    /// is returned.
58    pub(crate) fn push_str(&mut self, s: &str) -> bool {
59        let len = usize::from(self.len);
60        let Some(new_len) = len.checked_add(s.len()) else { return false };
61        if new_len > N {
62            return false;
63        }
64        self.bytes[len..new_len].copy_from_slice(s.as_bytes());
65        // OK because we don't ever use anything bigger than u8::MAX for `N`.
66        // And we probably shouldn't, because that would be a pretty chunky
67        // array. If such a thing is needed, please file an issue to discuss.
68        debug_assert!(
69            N <= usize::from(u8::MAX),
70            "size of ArrayStr is too big"
71        );
72        self.len = u8::try_from(new_len).unwrap();
73        true
74    }
75
76    /// Returns this array string as a string slice.
77    pub(crate) fn as_str(&self) -> &str {
78        // OK because construction guarantees valid UTF-8.
79        //
80        // This is bullet proof enough to use unchecked `str` construction
81        // here, but I can't dream up of a benchmark where it matters.
82        core::str::from_utf8(&self.bytes[..usize::from(self.len)]).unwrap()
83    }
84}
85
86/// Easy construction of `ArrayStr` from `&'static str`.
87///
88/// We specifically limit to `&'static str` to approximate string literals.
89/// This prevents most cases of accidentally creating a non-string literal
90/// that panics if the string is too big.
91///
92/// This impl primarily exists to make writing tests more convenient.
93impl<const N: usize> From<&'static str> for ArrayStr<N> {
94    fn from(s: &'static str) -> ArrayStr<N> {
95        ArrayStr::new(s).unwrap()
96    }
97}
98
99impl<const N: usize> PartialEq<str> for ArrayStr<N> {
100    fn eq(&self, rhs: &str) -> bool {
101        self.as_str() == rhs
102    }
103}
104
105impl<const N: usize> PartialEq<&str> for ArrayStr<N> {
106    fn eq(&self, rhs: &&str) -> bool {
107        self.as_str() == *rhs
108    }
109}
110
111impl<const N: usize> PartialEq<ArrayStr<N>> for str {
112    fn eq(&self, rhs: &ArrayStr<N>) -> bool {
113        self == rhs.as_str()
114    }
115}
116
117impl<const N: usize> core::fmt::Debug for ArrayStr<N> {
118    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
119        core::fmt::Debug::fmt(self.as_str(), f)
120    }
121}
122
123impl<const N: usize> core::fmt::Display for ArrayStr<N> {
124    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
125        core::fmt::Display::fmt(self.as_str(), f)
126    }
127}
128
129impl<const N: usize> core::fmt::Write for ArrayStr<N> {
130    fn write_str(&mut self, s: &str) -> core::fmt::Result {
131        if self.push_str(s) {
132            Ok(())
133        } else {
134            Err(core::fmt::Error)
135        }
136    }
137}
138
139/// A self-imposed limit on the size of a time zone abbreviation, in bytes.
140///
141/// POSIX says this:
142///
143/// > Indicate no less than three, nor more than {TZNAME_MAX}, bytes that are
144/// > the designation for the standard (std) or the alternative (dst -such as
145/// > Daylight Savings Time) timezone.
146///
147/// But it doesn't seem worth the trouble to query `TZNAME_MAX`. Interestingly,
148/// IANA says:
149///
150/// > are 3 or more characters specifying the standard and daylight saving time
151/// > (DST) zone abbreviations
152///
153/// Which implies that IANA thinks there is no limit. But that seems unwise.
154/// Moreover, in practice, it seems like the `date` utility supports fairly
155/// long abbreviations. On my mac (so, BSD `date` as I understand it):
156///
157/// ```text
158/// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date
159/// Sun Mar 17 20:05:58 YYYYYYYYYYYYYYYYYYYYY 2024
160/// ```
161///
162/// And on my Linux machine (so, GNU `date`):
163///
164/// ```text
165/// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date
166/// Sun Mar 17 08:05:36 PM YYYYYYYYYYYYYYYYYYYYY 2024
167/// ```
168///
169/// I don't know exactly what limit these programs use, but 30 seems good
170/// enough?
171///
172/// (Previously, I had been using 255 and stuffing the string in a `Box<str>`.
173/// But as part of work on [#168], I was looking to remove allocation from as
174/// many places as possible. And this was one candidate. But making room on the
175/// stack for 255 byte abbreviations seemed gratuitous. So I picked something
176/// smaller. If we come across an abbreviation bigger than this max, then we'll
177/// error.)
178///
179/// [#168]: https://github.com/BurntSushi/jiff/issues/168
180const ABBREVIATION_MAX: usize = 30;
181
182/// A type alias for centralizing the definition of a time zone abbreviation.
183///
184/// Basically, this creates one single coherent place where we control the
185/// length of a time zone abbreviation.
186pub(crate) type Abbreviation = ArrayStr<ABBREVIATION_MAX>;
187
188#[cfg(test)]
189mod tests {
190    use core::fmt::Write;
191
192    use super::*;
193
194    #[test]
195    fn fmt_write() {
196        let mut dst = ArrayStr::<5>::new("").unwrap();
197        assert!(write!(&mut dst, "abcd").is_ok());
198        assert!(write!(&mut dst, "e").is_ok());
199        assert!(write!(&mut dst, "f").is_err());
200    }
201}