jiff/util/
parse.rs

1use crate::{
2    error::{err, Error},
3    util::escape::{Byte, Bytes},
4};
5
6/// Parses an `i64` number from the beginning to the end of the given slice of
7/// ASCII digit characters.
8///
9/// If any byte in the given slice is not `[0-9]`, then this returns an error.
10/// Similarly, if the number parsed does not fit into a `i64`, then this
11/// returns an error. Notably, this routine does not permit parsing a negative
12/// integer. (We use `i64` because everything in this crate uses signed
13/// integers, and because a higher level routine might want to parse the sign
14/// and then apply it to the result of this routine.)
15#[cfg_attr(feature = "perf-inline", inline(always))]
16pub(crate) fn i64(bytes: &[u8]) -> Result<i64, Error> {
17    if bytes.is_empty() {
18        return Err(err!("invalid number, no digits found"));
19    }
20    let mut n: i64 = 0;
21    for &byte in bytes {
22        let digit = match byte.checked_sub(b'0') {
23            None => {
24                return Err(err!(
25                    "invalid digit, expected 0-9 but got {}",
26                    Byte(byte),
27                ));
28            }
29            Some(digit) if digit > 9 => {
30                return Err(err!(
31                    "invalid digit, expected 0-9 but got {}",
32                    Byte(byte),
33                ))
34            }
35            Some(digit) => {
36                debug_assert!((0..=9).contains(&digit));
37                i64::from(digit)
38            }
39        };
40        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
41            || {
42                err!(
43                    "number '{}' too big to parse into 64-bit integer",
44                    Bytes(bytes),
45                )
46            },
47        )?;
48    }
49    Ok(n)
50}
51
52/// Parses an `i64` fractional number from the beginning to the end of the
53/// given slice of ASCII digit characters.
54///
55/// The fraction's maximum precision must be provided. The returned integer
56/// will always be in units of `10^{max_precision}`. For example, to parse a
57/// fractional amount of seconds with a maximum precision of nanoseconds, then
58/// use `max_precision=9`.
59///
60/// If any byte in the given slice is not `[0-9]`, then this returns an error.
61/// Similarly, if the fraction parsed does not fit into a `i64`, then this
62/// returns an error. Notably, this routine does not permit parsing a negative
63/// integer. (We use `i64` because everything in this crate uses signed
64/// integers, and because a higher level routine might want to parse the sign
65/// and then apply it to the result of this routine.)
66pub(crate) fn fraction(
67    bytes: &[u8],
68    max_precision: usize,
69) -> Result<i64, Error> {
70    if bytes.is_empty() {
71        return Err(err!("invalid fraction, no digits found"));
72    } else if bytes.len() > max_precision {
73        return Err(err!(
74            "invalid fraction, too many digits \
75             (at most {max_precision} are allowed"
76        ));
77    }
78    let mut n: i64 = 0;
79    for &byte in bytes {
80        let digit = match byte.checked_sub(b'0') {
81            None => {
82                return Err(err!(
83                    "invalid fractional digit, expected 0-9 but got {}",
84                    Byte(byte),
85                ));
86            }
87            Some(digit) if digit > 9 => {
88                return Err(err!(
89                    "invalid fractional digit, expected 0-9 but got {}",
90                    Byte(byte),
91                ))
92            }
93            Some(digit) => {
94                debug_assert!((0..=9).contains(&digit));
95                i64::from(digit)
96            }
97        };
98        n = n.checked_mul(10).and_then(|n| n.checked_add(digit)).ok_or_else(
99            || {
100                err!(
101                    "fractional '{}' too big to parse into 64-bit integer",
102                    Bytes(bytes),
103                )
104            },
105        )?;
106    }
107    for _ in bytes.len()..max_precision {
108        n = n.checked_mul(10).ok_or_else(|| {
109            err!(
110                "fractional '{}' too big to parse into 64-bit integer \
111                 (too much precision supported)",
112                Bytes(bytes)
113            )
114        })?;
115    }
116    Ok(n)
117}
118
119/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
120///
121/// This is effectively `OsStr::to_str`, but with a slightly better error
122/// message.
123#[cfg(feature = "tzdb-zoneinfo")]
124pub(crate) fn os_str_utf8<'o, O>(os_str: &'o O) -> Result<&'o str, Error>
125where
126    O: ?Sized + AsRef<std::ffi::OsStr>,
127{
128    let os_str = os_str.as_ref();
129    os_str
130        .to_str()
131        .ok_or_else(|| err!("environment value {os_str:?} is not valid UTF-8"))
132}
133
134/// Parses an `OsStr` into a `&str` when `&[u8]` isn't easily available.
135///
136/// The main difference between this and `OsStr::to_str` is that this will
137/// be a zero-cost conversion on Unix platforms to `&[u8]`. On Windows, this
138/// will do UTF-8 validation and return an error if it's invalid UTF-8.
139#[cfg(feature = "tz-system")]
140pub(crate) fn os_str_bytes<'o, O>(os_str: &'o O) -> Result<&'o [u8], Error>
141where
142    O: ?Sized + AsRef<std::ffi::OsStr>,
143{
144    let os_str = os_str.as_ref();
145    #[cfg(unix)]
146    {
147        use std::os::unix::ffi::OsStrExt;
148        Ok(os_str.as_bytes())
149    }
150    #[cfg(not(unix))]
151    {
152        let string = os_str.to_str().ok_or_else(|| {
153            err!("environment value {os_str:?} is not valid UTF-8")
154        })?;
155        // It is suspect that we're doing UTF-8 validation and then throwing
156        // away the fact that we did UTF-8 validation. So this could lead
157        // to an extra UTF-8 check if the caller ultimately needs UTF-8. If
158        // that's important, we can add a new API that returns a `&str`. But it
159        // probably won't matter because an `OsStr` in this crate is usually
160        // just an environment variable.
161        Ok(string.as_bytes())
162    }
163}
164
165/// Splits the given input into two slices at the given position.
166///
167/// If the position is greater than the length of the slice given, then this
168/// returns `None`.
169#[cfg_attr(feature = "perf-inline", inline(always))]
170pub(crate) fn split(input: &[u8], at: usize) -> Option<(&[u8], &[u8])> {
171    if at > input.len() {
172        None
173    } else {
174        Some(input.split_at(at))
175    }
176}
177
178/// Returns a function that converts two slices to an offset.
179///
180/// It takes the starting point as input and returns a function that, when
181/// given an ending point (greater than or equal to the starting point), then
182/// the corresponding pointers are subtracted and an offset relative to the
183/// starting point is returned.
184///
185/// This is useful as a helper function in parsing routines that use slices
186/// but want to report offsets.
187///
188/// # Panics
189///
190/// This may panic if the ending point is not a suffix slice of `start`.
191pub(crate) fn offseter<'a>(
192    start: &'a [u8],
193) -> impl Fn(&'a [u8]) -> usize + 'a {
194    move |end| (end.as_ptr() as usize) - (start.as_ptr() as usize)
195}
196
197/// Returns a function that converts two slices to the slice between them.
198///
199/// This takes a starting point as input and returns a function that, when
200/// given an ending point (greater than or equal to the starting point), it
201/// returns a slice beginning at the starting point and ending just at the
202/// ending point.
203///
204/// This is useful as a helper function in parsing routines.
205///
206/// # Panics
207///
208/// This may panic if the ending point is not a suffix slice of `start`.
209pub(crate) fn slicer<'a>(
210    start: &'a [u8],
211) -> impl Fn(&'a [u8]) -> &'a [u8] + 'a {
212    let mkoffset = offseter(start);
213    move |end| {
214        let offset = mkoffset(end);
215        &start[..offset]
216    }
217}