nom/
internal.rs

1//! Basic types to build the parsers
2
3use self::Needed::*;
4use crate::error::{self, ErrorKind};
5use crate::lib::std::fmt;
6use core::num::NonZeroUsize;
7
8/// Holds the result of parsing functions
9///
10/// It depends on the input type `I`, the output type `O`, and the error type `E`
11/// (by default `(I, nom::ErrorKind)`)
12///
13/// The `Ok` side is a pair containing the remainder of the input (the part of the data that
14/// was not parsed) and the produced value. The `Err` side contains an instance of `nom::Err`.
15///
16/// Outside of the parsing code, you can use the [Finish::finish] method to convert
17/// it to a more common result type
18pub type IResult<I, O, E = error::Error<I>> = Result<(I, O), Err<E>>;
19
20/// Helper trait to convert a parser's result to a more manageable type
21pub trait Finish<I, O, E> {
22  /// converts the parser's result to a type that is more consumable by error
23  /// management libraries. It keeps the same `Ok` branch, and merges `Err::Error`
24  /// and `Err::Failure` into the `Err` side.
25  ///
26  /// *warning*: if the result is `Err(Err::Incomplete(_))`, this method will panic.
27  /// - "complete" parsers: It will not be an issue, `Incomplete` is never used
28  /// - "streaming" parsers: `Incomplete` will be returned if there's not enough data
29  /// for the parser to decide, and you should gather more data before parsing again.
30  /// Once the parser returns either `Ok(_)`, `Err(Err::Error(_))` or `Err(Err::Failure(_))`,
31  /// you can get out of the parsing loop and call `finish()` on the parser's result
32  fn finish(self) -> Result<(I, O), E>;
33}
34
35impl<I, O, E> Finish<I, O, E> for IResult<I, O, E> {
36  fn finish(self) -> Result<(I, O), E> {
37    match self {
38      Ok(res) => Ok(res),
39      Err(Err::Error(e)) | Err(Err::Failure(e)) => Err(e),
40      Err(Err::Incomplete(_)) => {
41        panic!("Cannot call `finish()` on `Err(Err::Incomplete(_))`: this result means that the parser does not have enough data to decide, you should gather more data and try to reapply  the parser instead")
42      }
43    }
44  }
45}
46
47/// Contains information on needed data if a parser returned `Incomplete`
48#[derive(Debug, PartialEq, Eq, Clone, Copy)]
49#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
50pub enum Needed {
51  /// Needs more data, but we do not know how much
52  Unknown,
53  /// Contains the required data size in bytes
54  Size(NonZeroUsize),
55}
56
57impl Needed {
58  /// Creates `Needed` instance, returns `Needed::Unknown` if the argument is zero
59  pub fn new(s: usize) -> Self {
60    match NonZeroUsize::new(s) {
61      Some(sz) => Needed::Size(sz),
62      None => Needed::Unknown,
63    }
64  }
65
66  /// Indicates if we know how many bytes we need
67  pub fn is_known(&self) -> bool {
68    *self != Unknown
69  }
70
71  /// Maps a `Needed` to `Needed` by applying a function to a contained `Size` value.
72  #[inline]
73  pub fn map<F: Fn(NonZeroUsize) -> usize>(self, f: F) -> Needed {
74    match self {
75      Unknown => Unknown,
76      Size(n) => Needed::new(f(n)),
77    }
78  }
79}
80
81/// The `Err` enum indicates the parser was not successful
82///
83/// It has three cases:
84///
85/// * `Incomplete` indicates that more data is needed to decide. The `Needed` enum
86/// can contain how many additional bytes are necessary. If you are sure your parser
87/// is working on full data, you can wrap your parser with the `complete` combinator
88/// to transform that case in `Error`
89/// * `Error` means some parser did not succeed, but another one might (as an example,
90/// when testing different branches of an `alt` combinator)
91/// * `Failure` indicates an unrecoverable error. As an example, if you recognize a prefix
92/// to decide on the next parser to apply, and that parser fails, you know there's no need
93/// to try other parsers, you were already in the right branch, so the data is invalid
94///
95#[derive(Debug, Clone, PartialEq)]
96#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
97pub enum Err<E> {
98  /// There was not enough data
99  Incomplete(Needed),
100  /// The parser had an error (recoverable)
101  Error(E),
102  /// The parser had an unrecoverable error: we got to the right
103  /// branch and we know other branches won't work, so backtrack
104  /// as fast as possible
105  Failure(E),
106}
107
108impl<E> Err<E> {
109  /// Tests if the result is Incomplete
110  pub fn is_incomplete(&self) -> bool {
111    if let Err::Incomplete(_) = self {
112      true
113    } else {
114      false
115    }
116  }
117
118  /// Applies the given function to the inner error
119  pub fn map<E2, F>(self, f: F) -> Err<E2>
120  where
121    F: FnOnce(E) -> E2,
122  {
123    match self {
124      Err::Incomplete(n) => Err::Incomplete(n),
125      Err::Failure(t) => Err::Failure(f(t)),
126      Err::Error(t) => Err::Error(f(t)),
127    }
128  }
129
130  /// Automatically converts between errors if the underlying type supports it
131  pub fn convert<F>(e: Err<F>) -> Self
132  where
133    E: From<F>,
134  {
135    e.map(crate::lib::std::convert::Into::into)
136  }
137}
138
139impl<T> Err<(T, ErrorKind)> {
140  /// Maps `Err<(T, ErrorKind)>` to `Err<(U, ErrorKind)>` with the given `F: T -> U`
141  pub fn map_input<U, F>(self, f: F) -> Err<(U, ErrorKind)>
142  where
143    F: FnOnce(T) -> U,
144  {
145    match self {
146      Err::Incomplete(n) => Err::Incomplete(n),
147      Err::Failure((input, k)) => Err::Failure((f(input), k)),
148      Err::Error((input, k)) => Err::Error((f(input), k)),
149    }
150  }
151}
152
153impl<T> Err<error::Error<T>> {
154  /// Maps `Err<error::Error<T>>` to `Err<error::Error<U>>` with the given `F: T -> U`
155  pub fn map_input<U, F>(self, f: F) -> Err<error::Error<U>>
156  where
157    F: FnOnce(T) -> U,
158  {
159    match self {
160      Err::Incomplete(n) => Err::Incomplete(n),
161      Err::Failure(error::Error { input, code }) => Err::Failure(error::Error {
162        input: f(input),
163        code,
164      }),
165      Err::Error(error::Error { input, code }) => Err::Error(error::Error {
166        input: f(input),
167        code,
168      }),
169    }
170  }
171}
172
173#[cfg(feature = "alloc")]
174use crate::lib::std::{borrow::ToOwned, string::String, vec::Vec};
175#[cfg(feature = "alloc")]
176impl Err<(&[u8], ErrorKind)> {
177  /// Obtaining ownership
178  #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
179  pub fn to_owned(self) -> Err<(Vec<u8>, ErrorKind)> {
180    self.map_input(ToOwned::to_owned)
181  }
182}
183
184#[cfg(feature = "alloc")]
185impl Err<(&str, ErrorKind)> {
186  /// Obtaining ownership
187  #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
188  pub fn to_owned(self) -> Err<(String, ErrorKind)> {
189    self.map_input(ToOwned::to_owned)
190  }
191}
192
193#[cfg(feature = "alloc")]
194impl Err<error::Error<&[u8]>> {
195  /// Obtaining ownership
196  #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
197  pub fn to_owned(self) -> Err<error::Error<Vec<u8>>> {
198    self.map_input(ToOwned::to_owned)
199  }
200}
201
202#[cfg(feature = "alloc")]
203impl Err<error::Error<&str>> {
204  /// Obtaining ownership
205  #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))]
206  pub fn to_owned(self) -> Err<error::Error<String>> {
207    self.map_input(ToOwned::to_owned)
208  }
209}
210
211impl<E: Eq> Eq for Err<E> {}
212
213impl<E> fmt::Display for Err<E>
214where
215  E: fmt::Debug,
216{
217  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
218    match self {
219      Err::Incomplete(Needed::Size(u)) => write!(f, "Parsing requires {} bytes/chars", u),
220      Err::Incomplete(Needed::Unknown) => write!(f, "Parsing requires more data"),
221      Err::Failure(c) => write!(f, "Parsing Failure: {:?}", c),
222      Err::Error(c) => write!(f, "Parsing Error: {:?}", c),
223    }
224  }
225}
226
227#[cfg(feature = "std")]
228use std::error::Error;
229
230#[cfg(feature = "std")]
231impl<E> Error for Err<E>
232where
233  E: fmt::Debug,
234{
235  fn source(&self) -> Option<&(dyn Error + 'static)> {
236    None // no underlying error
237  }
238}
239
240/// All nom parsers implement this trait
241pub trait Parser<I, O, E> {
242  /// A parser takes in input type, and returns a `Result` containing
243  /// either the remaining input and the output value, or an error
244  fn parse(&mut self, input: I) -> IResult<I, O, E>;
245
246  /// Maps a function over the result of a parser
247  fn map<G, O2>(self, g: G) -> Map<Self, G, O>
248  where
249    G: Fn(O) -> O2,
250    Self: core::marker::Sized,
251  {
252    Map {
253      f: self,
254      g,
255      phantom: core::marker::PhantomData,
256    }
257  }
258
259  /// Creates a second parser from the output of the first one, then apply over the rest of the input
260  fn flat_map<G, H, O2>(self, g: G) -> FlatMap<Self, G, O>
261  where
262    G: FnMut(O) -> H,
263    H: Parser<I, O2, E>,
264    Self: core::marker::Sized,
265  {
266    FlatMap {
267      f: self,
268      g,
269      phantom: core::marker::PhantomData,
270    }
271  }
272
273  /// Applies a second parser over the output of the first one
274  fn and_then<G, O2>(self, g: G) -> AndThen<Self, G, O>
275  where
276    G: Parser<O, O2, E>,
277    Self: core::marker::Sized,
278  {
279    AndThen {
280      f: self,
281      g,
282      phantom: core::marker::PhantomData,
283    }
284  }
285
286  /// Applies a second parser after the first one, return their results as a tuple
287  fn and<G, O2>(self, g: G) -> And<Self, G>
288  where
289    G: Parser<I, O2, E>,
290    Self: core::marker::Sized,
291  {
292    And { f: self, g }
293  }
294
295  /// Applies a second parser over the input if the first one failed
296  fn or<G>(self, g: G) -> Or<Self, G>
297  where
298    G: Parser<I, O, E>,
299    Self: core::marker::Sized,
300  {
301    Or { f: self, g }
302  }
303
304  /// automatically converts the parser's output and error values to another type, as long as they
305  /// implement the `From` trait
306  fn into<O2: From<O>, E2: From<E>>(self) -> Into<Self, O, O2, E, E2>
307  where
308    Self: core::marker::Sized,
309  {
310    Into {
311      f: self,
312      phantom_out1: core::marker::PhantomData,
313      phantom_err1: core::marker::PhantomData,
314      phantom_out2: core::marker::PhantomData,
315      phantom_err2: core::marker::PhantomData,
316    }
317  }
318}
319
320impl<'a, I, O, E, F> Parser<I, O, E> for F
321where
322  F: FnMut(I) -> IResult<I, O, E> + 'a,
323{
324  fn parse(&mut self, i: I) -> IResult<I, O, E> {
325    self(i)
326  }
327}
328
329#[cfg(feature = "alloc")]
330use alloc::boxed::Box;
331
332#[cfg(feature = "alloc")]
333impl<'a, I, O, E> Parser<I, O, E> for Box<dyn Parser<I, O, E> + 'a> {
334  fn parse(&mut self, input: I) -> IResult<I, O, E> {
335    (**self).parse(input)
336  }
337}
338
339/// Implementation of `Parser::map`
340#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
341pub struct Map<F, G, O1> {
342  f: F,
343  g: G,
344  phantom: core::marker::PhantomData<O1>,
345}
346
347impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> O2> Parser<I, O2, E> for Map<F, G, O1> {
348  fn parse(&mut self, i: I) -> IResult<I, O2, E> {
349    match self.f.parse(i) {
350      Err(e) => Err(e),
351      Ok((i, o)) => Ok((i, (self.g)(o))),
352    }
353  }
354}
355
356/// Implementation of `Parser::flat_map`
357#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
358pub struct FlatMap<F, G, O1> {
359  f: F,
360  g: G,
361  phantom: core::marker::PhantomData<O1>,
362}
363
364impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> H, H: Parser<I, O2, E>> Parser<I, O2, E>
365  for FlatMap<F, G, O1>
366{
367  fn parse(&mut self, i: I) -> IResult<I, O2, E> {
368    let (i, o1) = self.f.parse(i)?;
369    (self.g)(o1).parse(i)
370  }
371}
372
373/// Implementation of `Parser::and_then`
374#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
375pub struct AndThen<F, G, O1> {
376  f: F,
377  g: G,
378  phantom: core::marker::PhantomData<O1>,
379}
380
381impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<O1, O2, E>> Parser<I, O2, E>
382  for AndThen<F, G, O1>
383{
384  fn parse(&mut self, i: I) -> IResult<I, O2, E> {
385    let (i, o1) = self.f.parse(i)?;
386    let (_, o2) = self.g.parse(o1)?;
387    Ok((i, o2))
388  }
389}
390
391/// Implementation of `Parser::and`
392#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
393pub struct And<F, G> {
394  f: F,
395  g: G,
396}
397
398impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<I, O2, E>> Parser<I, (O1, O2), E>
399  for And<F, G>
400{
401  fn parse(&mut self, i: I) -> IResult<I, (O1, O2), E> {
402    let (i, o1) = self.f.parse(i)?;
403    let (i, o2) = self.g.parse(i)?;
404    Ok((i, (o1, o2)))
405  }
406}
407
408/// Implementation of `Parser::or`
409#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
410pub struct Or<F, G> {
411  f: F,
412  g: G,
413}
414
415impl<'a, I: Clone, O, E: crate::error::ParseError<I>, F: Parser<I, O, E>, G: Parser<I, O, E>>
416  Parser<I, O, E> for Or<F, G>
417{
418  fn parse(&mut self, i: I) -> IResult<I, O, E> {
419    match self.f.parse(i.clone()) {
420      Err(Err::Error(e1)) => match self.g.parse(i) {
421        Err(Err::Error(e2)) => Err(Err::Error(e1.or(e2))),
422        res => res,
423      },
424      res => res,
425    }
426  }
427}
428
429/// Implementation of `Parser::into`
430#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))]
431pub struct Into<F, O1, O2: From<O1>, E1, E2: From<E1>> {
432  f: F,
433  phantom_out1: core::marker::PhantomData<O1>,
434  phantom_err1: core::marker::PhantomData<E1>,
435  phantom_out2: core::marker::PhantomData<O2>,
436  phantom_err2: core::marker::PhantomData<E2>,
437}
438
439impl<
440    'a,
441    I: Clone,
442    O1,
443    O2: From<O1>,
444    E1,
445    E2: crate::error::ParseError<I> + From<E1>,
446    F: Parser<I, O1, E1>,
447  > Parser<I, O2, E2> for Into<F, O1, O2, E1, E2>
448{
449  fn parse(&mut self, i: I) -> IResult<I, O2, E2> {
450    match self.f.parse(i) {
451      Ok((i, o)) => Ok((i, o.into())),
452      Err(Err::Error(e)) => Err(Err::Error(e.into())),
453      Err(Err::Failure(e)) => Err(Err::Failure(e.into())),
454      Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)),
455    }
456  }
457}
458
459#[cfg(test)]
460mod tests {
461  use super::*;
462  use crate::error::ErrorKind;
463
464  #[doc(hidden)]
465  #[macro_export]
466  macro_rules! assert_size (
467    ($t:ty, $sz:expr) => (
468      assert_eq!(crate::lib::std::mem::size_of::<$t>(), $sz);
469    );
470  );
471
472  #[test]
473  #[cfg(target_pointer_width = "64")]
474  fn size_test() {
475    assert_size!(IResult<&[u8], &[u8], (&[u8], u32)>, 40);
476    //FIXME: since rust 1.65, this is now 32 bytes, likely thanks to https://github.com/rust-lang/rust/pull/94075
477    // deactivating that test for now because it'll have different values depending on the rust version
478    // assert_size!(IResult<&str, &str, u32>, 40);
479    assert_size!(Needed, 8);
480    assert_size!(Err<u32>, 16);
481    assert_size!(ErrorKind, 1);
482  }
483
484  #[test]
485  fn err_map_test() {
486    let e = Err::Error(1);
487    assert_eq!(e.map(|v| v + 1), Err::Error(2));
488  }
489}