jiff/shared/util/array_str.rs
1/// A simple and not the most-efficient fixed size string on the stack.
2///
3/// This supplanted some uses of `Box<str>` for storing tiny strings in an
4/// effort to reduce our dependence on dynamic memory allocation.
5///
6/// Also, since it isn't needed and it lets us save on storage requirements,
7/// `N` must be less than `256` (so that the length can fit in a `u8`).
8#[derive(Clone, Copy, Eq, Hash, PartialEq, PartialOrd, Ord)]
9#[doc(hidden)] // not part of Jiff's public API
10pub struct ArrayStr<const N: usize> {
11 /// The UTF-8 bytes that make up the string.
12 ///
13 /// This array---the entire array---is always valid UTF-8. And
14 /// the `0..self.len` sub-slice is also always valid UTF-8.
15 bytes: [u8; N],
16 /// The number of bytes used by the string in `bytes`.
17 ///
18 /// (We could technically save this byte in some cases and use a NUL
19 /// terminator. For example, since we don't permit NUL bytes in POSIX time
20 /// zone abbreviation strings, but this is simpler and only one byte and
21 /// generalizes. And we're not really trying to micro-optimize the storage
22 /// requirements when we use these array strings. Or at least, I don't know
23 /// of a reason to.)
24 len: u8,
25}
26
27impl<const N: usize> ArrayStr<N> {
28 /// Creates a new fixed capacity string.
29 ///
30 /// If the given string exceeds `N` bytes, then this returns
31 /// `None`.
32 pub(crate) const fn new(s: &str) -> Option<ArrayStr<N>> {
33 let len = s.len();
34 if len > N {
35 return None;
36 }
37 let mut bytes = [0; N];
38 let mut i = 0;
39 while i < s.as_bytes().len() {
40 bytes[i] = s.as_bytes()[i];
41 i += 1;
42 }
43 // OK because we don't ever use anything bigger than u8::MAX for `N`.
44 // And we probably shouldn't, because that would be a pretty chunky
45 // array. If such a thing is needed, please file an issue to discuss.
46 debug_assert!(N <= u8::MAX as usize, "size of ArrayStr is too big");
47 Some(ArrayStr { bytes, len: len as u8 })
48 }
49
50 /// Returns the capacity of this array string.
51 pub(crate) fn capacity() -> usize {
52 N
53 }
54
55 /// Append the bytes given to the end of this string.
56 ///
57 /// If the capacity would be exceeded, then this is a no-op and `false`
58 /// is returned.
59 pub(crate) fn push_str(&mut self, s: &str) -> bool {
60 let len = usize::from(self.len);
61 let Some(new_len) = len.checked_add(s.len()) else { return false };
62 if new_len > N {
63 return false;
64 }
65 self.bytes[len..new_len].copy_from_slice(s.as_bytes());
66 // OK because we don't ever use anything bigger than u8::MAX for `N`.
67 // And we probably shouldn't, because that would be a pretty chunky
68 // array. If such a thing is needed, please file an issue to discuss.
69 debug_assert!(
70 N <= usize::from(u8::MAX),
71 "size of ArrayStr is too big"
72 );
73 self.len = u8::try_from(new_len).unwrap();
74 true
75 }
76
77 /// Returns this array string as a string slice.
78 pub(crate) fn as_str(&self) -> &str {
79 // OK because construction guarantees valid UTF-8.
80 //
81 // This is bullet proof enough to use unchecked `str` construction
82 // here, but I can't dream up of a benchmark where it matters.
83 core::str::from_utf8(&self.bytes[..usize::from(self.len)]).unwrap()
84 }
85}
86
87/// Easy construction of `ArrayStr` from `&'static str`.
88///
89/// We specifically limit to `&'static str` to approximate string literals.
90/// This prevents most cases of accidentally creating a non-string literal
91/// that panics if the string is too big.
92///
93/// This impl primarily exists to make writing tests more convenient.
94impl<const N: usize> From<&'static str> for ArrayStr<N> {
95 fn from(s: &'static str) -> ArrayStr<N> {
96 ArrayStr::new(s).unwrap()
97 }
98}
99
100impl<const N: usize> PartialEq<str> for ArrayStr<N> {
101 fn eq(&self, rhs: &str) -> bool {
102 self.as_str() == rhs
103 }
104}
105
106impl<const N: usize> PartialEq<&str> for ArrayStr<N> {
107 fn eq(&self, rhs: &&str) -> bool {
108 self.as_str() == *rhs
109 }
110}
111
112impl<const N: usize> PartialEq<ArrayStr<N>> for str {
113 fn eq(&self, rhs: &ArrayStr<N>) -> bool {
114 self == rhs.as_str()
115 }
116}
117
118impl<const N: usize> core::fmt::Debug for ArrayStr<N> {
119 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
120 core::fmt::Debug::fmt(self.as_str(), f)
121 }
122}
123
124impl<const N: usize> core::fmt::Display for ArrayStr<N> {
125 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
126 core::fmt::Display::fmt(self.as_str(), f)
127 }
128}
129
130impl<const N: usize> core::fmt::Write for ArrayStr<N> {
131 fn write_str(&mut self, s: &str) -> core::fmt::Result {
132 if self.push_str(s) {
133 Ok(())
134 } else {
135 Err(core::fmt::Error)
136 }
137 }
138}
139
140impl<const N: usize> AsRef<str> for ArrayStr<N> {
141 fn as_ref(&self) -> &str {
142 self.as_str()
143 }
144}
145
146/// A self-imposed limit on the size of a time zone abbreviation, in bytes.
147///
148/// POSIX says this:
149///
150/// > Indicate no less than three, nor more than {TZNAME_MAX}, bytes that are
151/// > the designation for the standard (std) or the alternative (dst -such as
152/// > Daylight Savings Time) timezone.
153///
154/// But it doesn't seem worth the trouble to query `TZNAME_MAX`. Interestingly,
155/// IANA says:
156///
157/// > are 3 or more characters specifying the standard and daylight saving time
158/// > (DST) zone abbreviations
159///
160/// Which implies that IANA thinks there is no limit. But that seems unwise.
161/// Moreover, in practice, it seems like the `date` utility supports fairly
162/// long abbreviations. On my mac (so, BSD `date` as I understand it):
163///
164/// ```text
165/// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date
166/// Sun Mar 17 20:05:58 YYYYYYYYYYYYYYYYYYYYY 2024
167/// ```
168///
169/// And on my Linux machine (so, GNU `date`):
170///
171/// ```text
172/// $ TZ=ZZZ5YYYYYYYYYYYYYYYYYYYYY date
173/// Sun Mar 17 08:05:36 PM YYYYYYYYYYYYYYYYYYYYY 2024
174/// ```
175///
176/// I don't know exactly what limit these programs use, but 30 seems good
177/// enough?
178///
179/// (Previously, I had been using 255 and stuffing the string in a `Box<str>`.
180/// But as part of work on [#168], I was looking to remove allocation from as
181/// many places as possible. And this was one candidate. But making room on the
182/// stack for 255 byte abbreviations seemed gratuitous. So I picked something
183/// smaller. If we come across an abbreviation bigger than this max, then we'll
184/// error.)
185///
186/// [#168]: https://github.com/BurntSushi/jiff/issues/168
187const ABBREVIATION_MAX: usize = 30;
188
189/// A type alias for centralizing the definition of a time zone abbreviation.
190///
191/// Basically, this creates one single coherent place where we control the
192/// length of a time zone abbreviation.
193#[doc(hidden)] // not part of Jiff's public API
194pub type Abbreviation = ArrayStr<ABBREVIATION_MAX>;
195
196#[cfg(test)]
197mod tests {
198 use core::fmt::Write;
199
200 use super::*;
201
202 #[test]
203 fn fmt_write() {
204 let mut dst = ArrayStr::<5>::new("").unwrap();
205 assert!(write!(&mut dst, "abcd").is_ok());
206 assert!(write!(&mut dst, "e").is_ok());
207 assert!(write!(&mut dst, "f").is_err());
208 }
209}