pest/
parser_state.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10use alloc::borrow::{Cow, ToOwned};
11use alloc::boxed::Box;
12use alloc::rc::Rc;
13use alloc::vec;
14use alloc::vec::Vec;
15use core::num::NonZeroUsize;
16use core::ops::Range;
17use core::sync::atomic::{AtomicUsize, Ordering};
18
19use crate::error::{Error, ErrorVariant};
20use crate::iterators::{pairs, QueueableToken};
21use crate::position::{self, Position};
22use crate::span::Span;
23use crate::stack::Stack;
24use crate::RuleType;
25
26/// The current lookahead status of a [`ParserState`].
27///
28/// [`ParserState`]: struct.ParserState.html
29#[derive(Clone, Copy, Debug, Eq, PartialEq)]
30pub enum Lookahead {
31    /// The positive predicate, written as an ampersand &,
32    /// attempts to match its inner expression.
33    /// If the inner expression succeeds, parsing continues,
34    /// but at the same position as the predicate —
35    /// &foo ~ bar is thus a kind of "AND" statement:
36    /// "the input string must match foo AND bar".
37    /// If the inner expression fails,
38    /// the whole expression fails too.
39    Positive,
40    /// The negative predicate, written as an exclamation mark !,
41    /// attempts to match its inner expression.
42    /// If the inner expression fails, the predicate succeeds
43    /// and parsing continues at the same position as the predicate.
44    /// If the inner expression succeeds, the predicate fails —
45    /// !foo ~ bar is thus a kind of "NOT" statement:
46    /// "the input string must match bar but NOT foo".
47    Negative,
48    /// No lookahead (i.e. it will consume input).
49    None,
50}
51
52/// The current atomicity of a [`ParserState`].
53///
54/// [`ParserState`]: struct.ParserState.html
55#[derive(Clone, Copy, Debug, Eq, PartialEq)]
56pub enum Atomicity {
57    /// prevents implicit whitespace: inside an atomic rule,
58    /// the tilde ~ means "immediately followed by",
59    /// and repetition operators (asterisk * and plus sign +)
60    /// have no implicit separation. In addition, all other rules
61    /// called from an atomic rule are also treated as atomic.
62    /// (interior matching rules are silent)
63    Atomic,
64    /// The same as atomic, but inner tokens are produced as normal.
65    CompoundAtomic,
66    /// implicit whitespace is enabled
67    NonAtomic,
68}
69
70/// Type alias to simplify specifying the return value of chained closures.
71pub type ParseResult<S> = Result<S, S>;
72
73/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
74#[derive(Clone, Copy, Debug, Eq, PartialEq)]
75pub enum MatchDir {
76    /// from the bottom to the top of the stack
77    BottomToTop,
78    /// from the top to the bottom of the stack
79    TopToBottom,
80}
81
82static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
83
84/// Sets the maximum call limit for the parser state
85/// to prevent stack overflows or excessive execution times
86/// in some grammars.
87/// If set, the calls are tracked as a running total
88/// over all non-terminal rules that can nest closures
89/// (which are passed to transform the parser state).
90///
91/// # Arguments
92///
93/// * `limit` - The maximum number of calls. If None,
94///             the number of calls is unlimited.
95pub fn set_call_limit(limit: Option<NonZeroUsize>) {
96    CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
97}
98
99#[derive(Debug)]
100struct CallLimitTracker {
101    current_call_limit: Option<(usize, usize)>,
102}
103
104impl Default for CallLimitTracker {
105    fn default() -> Self {
106        let limit = CALL_LIMIT.load(Ordering::Relaxed);
107        let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
108        Self { current_call_limit }
109    }
110}
111
112impl CallLimitTracker {
113    fn limit_reached(&self) -> bool {
114        self.current_call_limit
115            .map_or(false, |(current, limit)| current >= limit)
116    }
117
118    fn increment_depth(&mut self) {
119        if let Some((current, _)) = &mut self.current_call_limit {
120            *current += 1;
121        }
122    }
123}
124
125/// The complete state of a [`Parser`].
126///
127/// [`Parser`]: trait.Parser.html
128#[derive(Debug)]
129pub struct ParserState<'i, R: RuleType> {
130    position: Position<'i>,
131    queue: Vec<QueueableToken<'i, R>>,
132    lookahead: Lookahead,
133    pos_attempts: Vec<R>,
134    neg_attempts: Vec<R>,
135    attempt_pos: usize,
136    atomicity: Atomicity,
137    stack: Stack<Span<'i>>,
138    call_tracker: CallLimitTracker,
139}
140
141/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
142///
143/// # Examples
144///
145/// ```
146/// # use pest;
147/// let input = "";
148/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
149/// ```
150#[allow(clippy::perf)]
151pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
152where
153    F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
154{
155    let state = ParserState::new(input);
156
157    match f(state) {
158        Ok(state) => {
159            let len = state.queue.len();
160            Ok(pairs::new(Rc::new(state.queue), input, None, 0, len))
161        }
162        Err(mut state) => {
163            let variant = if state.reached_call_limit() {
164                ErrorVariant::CustomError {
165                    message: "call limit reached".to_owned(),
166                }
167            } else {
168                state.pos_attempts.sort();
169                state.pos_attempts.dedup();
170                state.neg_attempts.sort();
171                state.neg_attempts.dedup();
172                ErrorVariant::ParsingError {
173                    positives: state.pos_attempts.clone(),
174                    negatives: state.neg_attempts.clone(),
175                }
176            };
177
178            Err(Error::new_from_pos(
179                variant,
180                // TODO(performance): Guarantee state.attempt_pos is a valid position
181                position::Position::new(input, state.attempt_pos).unwrap(),
182            ))
183        }
184    }
185}
186
187impl<'i, R: RuleType> ParserState<'i, R> {
188    /// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
189    /// will be passed from closure to closure based on the needs of the specified `Parser`.
190    ///
191    /// # Examples
192    ///
193    /// ```
194    /// # use pest;
195    /// let input = "";
196    /// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
197    /// ```
198    pub fn new(input: &'i str) -> Box<Self> {
199        Box::new(ParserState {
200            position: Position::from_start(input),
201            queue: vec![],
202            lookahead: Lookahead::None,
203            pos_attempts: vec![],
204            neg_attempts: vec![],
205            attempt_pos: 0,
206            atomicity: Atomicity::NonAtomic,
207            stack: Stack::new(),
208            call_tracker: Default::default(),
209        })
210    }
211
212    /// Returns a reference to the current `Position` of the `ParserState`.
213    ///
214    /// # Examples
215    ///
216    /// ```
217    /// # use pest;
218    /// # #[allow(non_camel_case_types)]
219    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
220    /// enum Rule {
221    ///     ab
222    /// }
223    ///
224    /// let input = "ab";
225    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
226    /// let position = state.position();
227    /// assert_eq!(position.pos(), 0);
228    /// ```
229    pub fn position(&self) -> &Position<'i> {
230        &self.position
231    }
232
233    /// Returns the current atomicity of the `ParserState`.
234    ///
235    /// # Examples
236    ///
237    /// ```
238    /// # use pest;
239    /// # use pest::Atomicity;
240    /// # #[allow(non_camel_case_types)]
241    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
242    /// enum Rule {
243    ///     ab
244    /// }
245    ///
246    /// let input = "ab";
247    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
248    /// let atomicity = state.atomicity();
249    /// assert_eq!(atomicity, Atomicity::NonAtomic);
250    /// ```
251    pub fn atomicity(&self) -> Atomicity {
252        self.atomicity
253    }
254
255    #[inline]
256    fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
257        if self.call_tracker.limit_reached() {
258            return Err(self);
259        }
260        self.call_tracker.increment_depth();
261        Ok(self)
262    }
263
264    #[inline]
265    fn reached_call_limit(&self) -> bool {
266        self.call_tracker.limit_reached()
267    }
268
269    /// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
270    /// meant to match the rule.
271    ///
272    /// # Examples
273    ///
274    /// ```
275    /// # use pest;
276    /// # #[allow(non_camel_case_types)]
277    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
278    /// enum Rule {
279    ///     a
280    /// }
281    ///
282    /// let input = "a";
283    /// let pairs: Vec<_> = pest::state(input, |state| {
284    ///     state.rule(Rule::a, |s| Ok(s))
285    /// }).unwrap().collect();
286    ///
287    /// assert_eq!(pairs.len(), 1);
288    /// ```
289    #[inline]
290    pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
291    where
292        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
293    {
294        self = self.inc_call_check_limit()?;
295        let actual_pos = self.position.pos();
296        let index = self.queue.len();
297
298        let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
299            (self.pos_attempts.len(), self.neg_attempts.len())
300        } else {
301            // Attempts have not been cleared yet since the attempt_pos is older.
302            (0, 0)
303        };
304
305        if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
306            // Pair's position will only be known after running the closure.
307            self.queue.push(QueueableToken::Start {
308                end_token_index: 0,
309                input_pos: actual_pos,
310            });
311        }
312
313        let attempts = self.attempts_at(actual_pos);
314
315        let result = f(self);
316
317        match result {
318            Ok(mut new_state) => {
319                if new_state.lookahead == Lookahead::Negative {
320                    new_state.track(
321                        rule,
322                        actual_pos,
323                        pos_attempts_index,
324                        neg_attempts_index,
325                        attempts,
326                    );
327                }
328
329                if new_state.lookahead == Lookahead::None
330                    && new_state.atomicity != Atomicity::Atomic
331                {
332                    // Storing the pair's index in the first token that was added before the closure was
333                    // run.
334                    let new_index = new_state.queue.len();
335                    match new_state.queue[index] {
336                        QueueableToken::Start {
337                            ref mut end_token_index,
338                            ..
339                        } => *end_token_index = new_index,
340                        _ => unreachable!(),
341                    };
342
343                    let new_pos = new_state.position.pos();
344
345                    new_state.queue.push(QueueableToken::End {
346                        start_token_index: index,
347                        rule,
348                        tag: None,
349                        input_pos: new_pos,
350                    });
351                }
352
353                Ok(new_state)
354            }
355            Err(mut new_state) => {
356                if new_state.lookahead != Lookahead::Negative {
357                    new_state.track(
358                        rule,
359                        actual_pos,
360                        pos_attempts_index,
361                        neg_attempts_index,
362                        attempts,
363                    );
364                }
365
366                if new_state.lookahead == Lookahead::None
367                    && new_state.atomicity != Atomicity::Atomic
368                {
369                    new_state.queue.truncate(index);
370                }
371
372                Err(new_state)
373            }
374        }
375    }
376
377    /// Tag current node
378    ///
379    /// # Examples
380    ///
381    /// Try to recognize the one specified in a set of characters
382    ///
383    /// ```
384    /// use pest::{state, ParseResult, ParserState, iterators::Pair};
385    /// #[allow(non_camel_case_types)]
386    /// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
387    /// enum Rule {
388    ///     character,
389    /// }
390    /// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
391    ///     state.sequence(|state| {
392    ///         character(state)
393    ///             .and_then(|state| character(state))
394    ///             .and_then(|state| character(state))
395    ///             .and_then(|state| state.tag_node(std::borrow::Cow::Borrowed("c")))
396    ///             .and_then(|state| character(state))
397    ///     })
398    /// }
399    /// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
400    ///     state.rule(Rule::character, |state| state.match_range('a'..'z'))
401    /// }
402    ///
403    /// let input = "abcd";
404    /// let pairs = state(input, mark_c).unwrap();
405    /// // find all node tag as `c`
406    /// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
407    /// assert_eq!(find[0].as_str(), "c")
408    /// ```
409    #[inline]
410    pub fn tag_node(mut self: Box<Self>, tag: Cow<'i, str>) -> ParseResult<Box<Self>> {
411        if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
412            *old = Some(tag)
413        }
414        Ok(self)
415    }
416
417    fn attempts_at(&self, pos: usize) -> usize {
418        if self.attempt_pos == pos {
419            self.pos_attempts.len() + self.neg_attempts.len()
420        } else {
421            0
422        }
423    }
424
425    fn track(
426        &mut self,
427        rule: R,
428        pos: usize,
429        pos_attempts_index: usize,
430        neg_attempts_index: usize,
431        prev_attempts: usize,
432    ) {
433        if self.atomicity == Atomicity::Atomic {
434            return;
435        }
436
437        // If nested rules made no progress, there is no use to report them; it's only useful to
438        // track the current rule, the exception being when only one attempt has been made during
439        // the children rules.
440        let curr_attempts = self.attempts_at(pos);
441        if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
442            return;
443        }
444
445        if pos == self.attempt_pos {
446            self.pos_attempts.truncate(pos_attempts_index);
447            self.neg_attempts.truncate(neg_attempts_index);
448        }
449
450        if pos > self.attempt_pos {
451            self.pos_attempts.clear();
452            self.neg_attempts.clear();
453            self.attempt_pos = pos;
454        }
455
456        let attempts = if self.lookahead != Lookahead::Negative {
457            &mut self.pos_attempts
458        } else {
459            &mut self.neg_attempts
460        };
461
462        if pos == self.attempt_pos {
463            attempts.push(rule);
464        }
465    }
466
467    /// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
468    /// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
469    /// `Box<ParserState>` otherwise.
470    ///
471    /// This method is useful to parse sequences that only match together which usually come in the
472    /// form of chained `Result`s with
473    /// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
474    ///
475    ///
476    /// # Examples
477    ///
478    /// ```
479    /// # use pest;
480    /// # #[allow(non_camel_case_types)]
481    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
482    /// enum Rule {
483    ///     a
484    /// }
485    ///
486    /// let input = "a";
487    /// let pairs: Vec<_> = pest::state(input, |state| {
488    ///     state.sequence(|s| {
489    ///         s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
490    ///             s.match_string("b")
491    ///         })
492    ///     }).or_else(|s| {
493    ///         Ok(s)
494    ///     })
495    /// }).unwrap().collect();
496    ///
497    /// assert_eq!(pairs.len(), 0);
498    /// ```
499    #[inline]
500    pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
501    where
502        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
503    {
504        self = self.inc_call_check_limit()?;
505        let token_index = self.queue.len();
506        let initial_pos = self.position;
507
508        let result = f(self);
509
510        match result {
511            Ok(new_state) => Ok(new_state),
512            Err(mut new_state) => {
513                // Restore the initial position and truncate the token queue.
514                new_state.position = initial_pos;
515                new_state.queue.truncate(token_index);
516                Err(new_state)
517            }
518        }
519    }
520
521    /// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
522    /// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
523    ///
524    /// # Examples
525    ///
526    /// ```
527    /// # use pest;
528    /// # #[allow(non_camel_case_types)]
529    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
530    /// enum Rule {
531    ///     ab
532    /// }
533    ///
534    /// let input = "aab";
535    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
536    /// let mut result = state.repeat(|s| {
537    ///     s.match_string("a")
538    /// });
539    /// assert!(result.is_ok());
540    /// assert_eq!(result.unwrap().position().pos(), 2);
541    ///
542    /// state = pest::ParserState::new(input);
543    /// result = state.repeat(|s| {
544    ///     s.match_string("b")
545    /// });
546    /// assert!(result.is_ok());
547    /// assert_eq!(result.unwrap().position().pos(), 0);
548    /// ```
549    #[inline]
550    pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
551    where
552        F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
553    {
554        self = self.inc_call_check_limit()?;
555        let mut result = f(self);
556
557        loop {
558            match result {
559                Ok(state) => result = f(state),
560                Err(state) => return Ok(state),
561            };
562        }
563    }
564
565    /// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
566    /// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
567    ///
568    /// # Examples
569    ///
570    /// ```
571    /// # use pest;
572    /// # #[allow(non_camel_case_types)]
573    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
574    /// enum Rule {
575    ///     ab
576    /// }
577    ///
578    /// let input = "ab";
579    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
580    /// let result = state.optional(|s| {
581    ///     s.match_string("ab")
582    /// });
583    /// assert!(result.is_ok());
584    ///
585    /// state = pest::ParserState::new(input);
586    /// let result = state.optional(|s| {
587    ///     s.match_string("ac")
588    /// });
589    /// assert!(result.is_ok());
590    /// ```
591    #[inline]
592    pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
593    where
594        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
595    {
596        self = self.inc_call_check_limit()?;
597        match f(self) {
598            Ok(state) | Err(state) => Ok(state),
599        }
600    }
601
602    /// Attempts to match a single character based on a filter function. Returns `Ok` with the
603    /// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
604    /// otherwise.
605    ///
606    /// # Examples
607    ///
608    /// ```
609    /// # use pest;
610    /// # #[allow(non_camel_case_types)]
611    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
612    /// enum Rule {}
613    ///
614    /// let input = "ab";
615    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
616    /// let result = state.match_char_by(|c| c.is_ascii());
617    /// assert!(result.is_ok());
618    /// assert_eq!(result.unwrap().position().pos(), 1);
619    ///
620    /// let input = "❤";
621    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
622    /// let result = state.match_char_by(|c| c.is_ascii());
623    /// assert!(result.is_err());
624    /// assert_eq!(result.unwrap_err().position().pos(), 0);
625    /// ```
626    #[inline]
627    pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
628    where
629        F: FnOnce(char) -> bool,
630    {
631        if self.position.match_char_by(f) {
632            Ok(self)
633        } else {
634            Err(self)
635        }
636    }
637
638    /// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
639    /// successful, or `Err` with the updated `Box<ParserState>` otherwise.
640    ///
641    /// # Examples
642    ///
643    /// ```
644    /// # use pest;
645    /// # #[allow(non_camel_case_types)]
646    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
647    /// enum Rule {}
648    ///
649    /// let input = "ab";
650    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
651    /// let mut result = state.match_string("ab");
652    /// assert!(result.is_ok());
653    /// assert_eq!(result.unwrap().position().pos(), 2);
654    ///
655    /// state = pest::ParserState::new(input);
656    /// result = state.match_string("ac");
657    /// assert!(result.is_err());
658    /// assert_eq!(result.unwrap_err().position().pos(), 0);
659    /// ```
660    #[inline]
661    pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
662        if self.position.match_string(string) {
663            Ok(self)
664        } else {
665            Err(self)
666        }
667    }
668
669    /// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
670    /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
671    ///
672    /// # Examples
673    ///
674    /// ```
675    /// # use pest;
676    /// # #[allow(non_camel_case_types)]
677    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
678    /// enum Rule {}
679    ///
680    /// let input = "ab";
681    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
682    /// let mut result = state.match_insensitive("AB");
683    /// assert!(result.is_ok());
684    /// assert_eq!(result.unwrap().position().pos(), 2);
685    ///
686    /// state = pest::ParserState::new(input);
687    /// result = state.match_insensitive("AC");
688    /// assert!(result.is_err());
689    /// assert_eq!(result.unwrap_err().position().pos(), 0);
690    /// ```
691    #[inline]
692    pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
693        if self.position.match_insensitive(string) {
694            Ok(self)
695        } else {
696            Err(self)
697        }
698    }
699
700    /// Attempts to match a single character from the given range. Returns `Ok` with the updated
701    /// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
702    ///
703    /// # Caution
704    /// The provided `range` is interpreted as inclusive.
705    ///
706    /// # Examples
707    ///
708    /// ```
709    /// # use pest;
710    /// # #[allow(non_camel_case_types)]
711    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
712    /// enum Rule {}
713    ///
714    /// let input = "ab";
715    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
716    /// let mut result = state.match_range('a'..'z');
717    /// assert!(result.is_ok());
718    /// assert_eq!(result.unwrap().position().pos(), 1);
719    ///
720    /// state = pest::ParserState::new(input);
721    /// result = state.match_range('A'..'Z');
722    /// assert!(result.is_err());
723    /// assert_eq!(result.unwrap_err().position().pos(), 0);
724    /// ```
725    #[inline]
726    pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
727        if self.position.match_range(range) {
728            Ok(self)
729        } else {
730            Err(self)
731        }
732    }
733
734    /// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
735    /// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
736    ///
737    /// # Examples
738    ///
739    /// ```
740    /// # use pest;
741    /// # #[allow(non_camel_case_types)]
742    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
743    /// enum Rule {}
744    ///
745    /// let input = "ab";
746    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
747    /// let mut result = state.skip(1);
748    /// assert!(result.is_ok());
749    /// assert_eq!(result.unwrap().position().pos(), 1);
750    ///
751    /// state = pest::ParserState::new(input);
752    /// result = state.skip(3);
753    /// assert!(result.is_err());
754    /// assert_eq!(result.unwrap_err().position().pos(), 0);
755    /// ```
756    #[inline]
757    pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
758        if self.position.skip(n) {
759            Ok(self)
760        } else {
761            Err(self)
762        }
763    }
764
765    /// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
766    /// updated `Box<ParserState>` whether or not one of the strings is found.
767    ///
768    /// # Examples
769    ///
770    /// ```
771    /// # use pest;
772    /// # #[allow(non_camel_case_types)]
773    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
774    /// enum Rule {}
775    ///
776    /// let input = "abcd";
777    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
778    /// let mut result = state.skip_until(&["c", "d"]);
779    /// assert!(result.is_ok());
780    /// assert_eq!(result.unwrap().position().pos(), 2);
781    /// ```
782    #[inline]
783    pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
784        self.position.skip_until(strings);
785        Ok(self)
786    }
787
788    /// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
789    /// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
790    ///
791    /// # Examples
792    ///
793    /// ```
794    /// # use pest;
795    /// # #[allow(non_camel_case_types)]
796    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
797    /// enum Rule {}
798    ///
799    /// let input = "ab";
800    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
801    /// let mut result = state.start_of_input();
802    /// assert!(result.is_ok());
803    ///
804    /// state = pest::ParserState::new(input);
805    /// state = state.match_string("ab").unwrap();
806    /// result = state.start_of_input();
807    /// assert!(result.is_err());
808    /// ```
809    #[inline]
810    pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
811        if self.position.at_start() {
812            Ok(self)
813        } else {
814            Err(self)
815        }
816    }
817
818    /// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
819    /// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
820    ///
821    /// # Examples
822    ///
823    /// ```
824    /// # use pest;
825    /// # #[allow(non_camel_case_types)]
826    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
827    /// enum Rule {}
828    ///
829    /// let input = "ab";
830    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
831    /// let mut result = state.end_of_input();
832    /// assert!(result.is_err());
833    ///
834    /// state = pest::ParserState::new(input);
835    /// state = state.match_string("ab").unwrap();
836    /// result = state.end_of_input();
837    /// assert!(result.is_ok());
838    /// ```
839    #[inline]
840    pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
841        if self.position.at_end() {
842            Ok(self)
843        } else {
844            Err(self)
845        }
846    }
847
848    /// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
849    /// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
850    /// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
851    /// together, negating the `Result`.
852    ///
853    /// # Examples
854    ///
855    /// ```
856    /// # use pest;
857    /// # #[allow(non_camel_case_types)]
858    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
859    /// enum Rule {
860    ///     a
861    /// }
862    ///
863    /// let input = "a";
864    /// let pairs: Vec<_> = pest::state(input, |state| {
865    ///     state.lookahead(true, |state| {
866    ///         state.rule(Rule::a, |s| Ok(s))
867    ///     })
868    /// }).unwrap().collect();
869    ///
870    /// assert_eq!(pairs.len(), 0);
871    /// ```
872    #[inline]
873    pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
874    where
875        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
876    {
877        self = self.inc_call_check_limit()?;
878        let initial_lookahead = self.lookahead;
879
880        self.lookahead = if is_positive {
881            match initial_lookahead {
882                Lookahead::None | Lookahead::Positive => Lookahead::Positive,
883                Lookahead::Negative => Lookahead::Negative,
884            }
885        } else {
886            match initial_lookahead {
887                Lookahead::None | Lookahead::Positive => Lookahead::Negative,
888                Lookahead::Negative => Lookahead::Positive,
889            }
890        };
891
892        let initial_pos = self.position;
893
894        let result = f(self.checkpoint());
895
896        let result_state = match result {
897            Ok(mut new_state) => {
898                new_state.position = initial_pos;
899                new_state.lookahead = initial_lookahead;
900                Ok(new_state.restore())
901            }
902            Err(mut new_state) => {
903                new_state.position = initial_pos;
904                new_state.lookahead = initial_lookahead;
905                Err(new_state.restore())
906            }
907        };
908
909        if is_positive {
910            result_state
911        } else {
912            match result_state {
913                Ok(state) => Err(state),
914                Err(state) => Ok(state),
915            }
916        }
917    }
918
919    /// Transformation which stops `Token`s from being generated according to `is_atomic`.
920    ///
921    /// # Examples
922    ///
923    /// ```
924    /// # use pest::{self, Atomicity};
925    /// # #[allow(non_camel_case_types)]
926    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
927    /// enum Rule {
928    ///     a
929    /// }
930    ///
931    /// let input = "a";
932    /// let pairs: Vec<_> = pest::state(input, |state| {
933    ///     state.atomic(Atomicity::Atomic, |s| {
934    ///         s.rule(Rule::a, |s| Ok(s))
935    ///     })
936    /// }).unwrap().collect();
937    ///
938    /// assert_eq!(pairs.len(), 0);
939    /// ```
940    #[inline]
941    pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
942    where
943        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
944    {
945        self = self.inc_call_check_limit()?;
946        let initial_atomicity = self.atomicity;
947        let should_toggle = self.atomicity != atomicity;
948
949        if should_toggle {
950            self.atomicity = atomicity;
951        }
952
953        let result = f(self);
954
955        match result {
956            Ok(mut new_state) => {
957                if should_toggle {
958                    new_state.atomicity = initial_atomicity;
959                }
960                Ok(new_state)
961            }
962            Err(mut new_state) => {
963                if should_toggle {
964                    new_state.atomicity = initial_atomicity;
965                }
966                Err(new_state)
967            }
968        }
969    }
970
971    /// Evaluates the result of closure `f` and pushes the span of the input consumed from before
972    /// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
973    /// called successfully, or `Err(Box<ParserState>)` otherwise.
974    ///
975    /// # Examples
976    ///
977    /// ```
978    /// # use pest;
979    /// # #[allow(non_camel_case_types)]
980    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
981    /// enum Rule {}
982    ///
983    /// let input = "ab";
984    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
985    /// let mut result = state.stack_push(|state| state.match_string("a"));
986    /// assert!(result.is_ok());
987    /// assert_eq!(result.unwrap().position().pos(), 1);
988    /// ```
989    #[inline]
990    pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
991    where
992        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
993    {
994        self = self.inc_call_check_limit()?;
995        let start = self.position;
996
997        let result = f(self);
998
999        match result {
1000            Ok(mut state) => {
1001                let end = state.position;
1002                state.stack.push(start.span(&end));
1003                Ok(state)
1004            }
1005            Err(state) => Err(state),
1006        }
1007    }
1008
1009    /// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1010    /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1011    ///
1012    /// # Examples
1013    ///
1014    /// ```
1015    /// # use pest;
1016    /// # #[allow(non_camel_case_types)]
1017    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1018    /// enum Rule {}
1019    ///
1020    /// let input = "aa";
1021    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1022    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1023    ///     |state| state.stack_peek()
1024    /// );
1025    /// assert!(result.is_ok());
1026    /// assert_eq!(result.unwrap().position().pos(), 2);
1027    /// ```
1028    #[inline]
1029    pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1030        let string = self
1031            .stack
1032            .peek()
1033            .expect("peek was called on empty stack")
1034            .as_str();
1035        self.match_string(string)
1036    }
1037
1038    /// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
1039    /// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
1040    ///
1041    /// # Examples
1042    ///
1043    /// ```
1044    /// # use pest;
1045    /// # #[allow(non_camel_case_types)]
1046    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1047    /// enum Rule {}
1048    ///
1049    /// let input = "aa";
1050    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1051    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1052    ///     |state| state.stack_pop()
1053    /// );
1054    /// assert!(result.is_ok());
1055    /// assert_eq!(result.unwrap().position().pos(), 2);
1056    /// ```
1057    #[inline]
1058    pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1059        let string = self
1060            .stack
1061            .pop()
1062            .expect("pop was called on empty stack")
1063            .as_str();
1064        self.match_string(string)
1065    }
1066
1067    /// Matches part of the state of the stack.
1068    ///
1069    /// # Examples
1070    ///
1071    /// ```
1072    /// # use pest::{self, MatchDir};
1073    /// # #[allow(non_camel_case_types)]
1074    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1075    /// enum Rule {}
1076    ///
1077    /// let input = "abcd cd cb";
1078    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1079    /// let mut result = state
1080    ///     .stack_push(|state| state.match_string("a"))
1081    ///     .and_then(|state| state.stack_push(|state| state.match_string("b")))
1082    ///     .and_then(|state| state.stack_push(|state| state.match_string("c")))
1083    ///     .and_then(|state| state.stack_push(|state| state.match_string("d")))
1084    ///     .and_then(|state| state.match_string(" "))
1085    ///     .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
1086    ///     .and_then(|state| state.match_string(" "))
1087    ///     .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
1088    /// assert!(result.is_ok());
1089    /// assert_eq!(result.unwrap().position().pos(), 10);
1090    /// ```
1091    #[inline]
1092    pub fn stack_match_peek_slice(
1093        mut self: Box<Self>,
1094        start: i32,
1095        end: Option<i32>,
1096        match_dir: MatchDir,
1097    ) -> ParseResult<Box<Self>> {
1098        let range = match constrain_idxs(start, end, self.stack.len()) {
1099            Some(r) => r,
1100            None => return Err(self),
1101        };
1102        // return true if an empty sequence is requested
1103        if range.end <= range.start {
1104            return Ok(self);
1105        }
1106
1107        let mut position = self.position;
1108        let result = {
1109            let mut iter_b2t = self.stack[range].iter();
1110            let matcher = |span: &Span<'_>| position.match_string(span.as_str());
1111            match match_dir {
1112                MatchDir::BottomToTop => iter_b2t.all(matcher),
1113                MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
1114            }
1115        };
1116        if result {
1117            self.position = position;
1118            Ok(self)
1119        } else {
1120            Err(self)
1121        }
1122    }
1123
1124    /// Matches the full state of the stack.
1125    ///
1126    /// # Examples
1127    ///
1128    /// ```
1129    /// # use pest;
1130    /// # #[allow(non_camel_case_types)]
1131    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1132    /// enum Rule {}
1133    ///
1134    /// let input = "abba";
1135    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1136    /// let mut result = state
1137    ///     .stack_push(|state| state.match_string("a"))
1138    ///     .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
1139    ///     .and_then(|state| state.stack_match_peek());
1140    /// assert!(result.is_ok());
1141    /// assert_eq!(result.unwrap().position().pos(), 4);
1142    /// ```
1143    #[inline]
1144    pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
1145        self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
1146    }
1147
1148    /// Matches the full state of the stack. This method will clear the stack as it evaluates.
1149    ///
1150    /// # Examples
1151    ///
1152    /// ```
1153    /// /// # use pest;
1154    /// # #[allow(non_camel_case_types)]
1155    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1156    /// enum Rule {}
1157    ///
1158    /// let input = "aaaa";
1159    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1160    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
1161    ///     state.stack_push(|state| state.match_string("a"))
1162    /// }).and_then(|state| state.stack_match_peek());
1163    /// assert!(result.is_ok());
1164    /// assert_eq!(result.unwrap().position().pos(), 4);
1165    /// ```
1166    #[inline]
1167    pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1168        let mut position = self.position;
1169        let mut result = true;
1170        while let Some(span) = self.stack.pop() {
1171            result = position.match_string(span.as_str());
1172            if !result {
1173                break;
1174            }
1175        }
1176
1177        if result {
1178            self.position = position;
1179            Ok(self)
1180        } else {
1181            Err(self)
1182        }
1183    }
1184
1185    /// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
1186    /// `Err(Box<ParserState>)` otherwise.
1187    ///
1188    /// # Examples
1189    ///
1190    /// ```
1191    /// # use pest;
1192    /// # #[allow(non_camel_case_types)]
1193    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1194    /// enum Rule {}
1195    ///
1196    /// let input = "aa";
1197    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1198    /// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
1199    ///     |state| state.stack_drop()
1200    /// );
1201    /// assert!(result.is_ok());
1202    /// assert_eq!(result.unwrap().position().pos(), 1);
1203    /// ```
1204    #[inline]
1205    pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
1206        match self.stack.pop() {
1207            Some(_) => Ok(self),
1208            None => Err(self),
1209        }
1210    }
1211
1212    /// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
1213    /// this method only restores the stack.
1214    ///
1215    /// # Examples
1216    ///
1217    /// ```
1218    /// # use pest;
1219    /// # #[allow(non_camel_case_types)]
1220    /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
1221    /// enum Rule {}
1222    ///
1223    /// let input = "ab";
1224    /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
1225    /// let mut result = state.restore_on_err(|state| state.stack_push(|state|
1226    ///     state.match_string("a")).and_then(|state| state.match_string("a"))
1227    /// );
1228    ///
1229    /// assert!(result.is_err());
1230    ///
1231    /// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
1232    /// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
1233    /// assert!(catch_panic.is_err());
1234    /// ```
1235    #[inline]
1236    pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
1237    where
1238        F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
1239    {
1240        match f(self.checkpoint()) {
1241            Ok(state) => Ok(state.checkpoint_ok()),
1242            Err(state) => Err(state.restore()),
1243        }
1244    }
1245
1246    // Mark the current state as a checkpoint and return the `Box`.
1247    #[inline]
1248    pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
1249        self.stack.snapshot();
1250        self
1251    }
1252
1253    // The checkpoint was cleared successfully
1254    // so remove it without touching other stack state.
1255    #[inline]
1256    pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
1257        self.stack.clear_snapshot();
1258        self
1259    }
1260
1261    // Restore the current state to the most recent checkpoint.
1262    #[inline]
1263    pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
1264        self.stack.restore();
1265        self
1266    }
1267}
1268
1269fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
1270    let start_norm = normalize_index(start, len)?;
1271    let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
1272    Some(start_norm..end_norm)
1273}
1274
1275/// Normalizes the index using its sequence’s length.
1276/// Returns `None` if the normalized index is OOB.
1277fn normalize_index(i: i32, len: usize) -> Option<usize> {
1278    if i > len as i32 {
1279        None
1280    } else if i >= 0 {
1281        Some(i as usize)
1282    } else {
1283        let real_i = len as i32 + i;
1284        if real_i >= 0 {
1285            Some(real_i as usize)
1286        } else {
1287            None
1288        }
1289    }
1290}
1291
1292#[cfg(test)]
1293mod test {
1294    use super::*;
1295
1296    #[test]
1297    fn normalize_index_pos() {
1298        assert_eq!(normalize_index(4, 6), Some(4));
1299        assert_eq!(normalize_index(5, 5), Some(5));
1300        assert_eq!(normalize_index(6, 3), None);
1301    }
1302
1303    #[test]
1304    fn normalize_index_neg() {
1305        assert_eq!(normalize_index(-4, 6), Some(2));
1306        assert_eq!(normalize_index(-5, 5), Some(0));
1307        assert_eq!(normalize_index(-6, 3), None);
1308    }
1309}