pest_meta/
ast.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! Types for the pest's abstract syntax tree.
11
12/// A grammar rule
13#[derive(Clone, Debug, Eq, PartialEq)]
14pub struct Rule {
15    /// The name of the rule
16    pub name: String,
17    /// The rule's type (silent, atomic, ...)
18    pub ty: RuleType,
19    /// The rule's expression
20    pub expr: Expr,
21}
22
23/// All possible rule types
24#[derive(Clone, Copy, Debug, Eq, PartialEq)]
25pub enum RuleType {
26    /// The normal rule type
27    Normal,
28    /// Silent rules are just like normal rules
29    /// — when run, they function the same way —
30    /// except they do not produce pairs or tokens.
31    /// If a rule is silent, it will never appear in a parse result.
32    /// (their syntax is `_{ ... }`)
33    Silent,
34    /// atomic rule prevent implicit whitespace: inside an atomic rule,
35    /// the tilde ~ means "immediately followed by",
36    /// and repetition operators (asterisk * and plus sign +)
37    /// have no implicit separation. In addition, all other rules
38    /// called from an atomic rule are also treated as atomic.
39    /// In an atomic rule, interior matching rules are silent.
40    /// (their syntax is `@{ ... }`)
41    Atomic,
42    /// Compound atomic rules are similar to atomic rules,
43    /// but they produce inner tokens as normal.
44    /// (their syntax is `${ ... }`)
45    CompoundAtomic,
46    /// Non-atomic rules cancel the effect of atomic rules.
47    /// (their syntax is `!{ ... }`)
48    NonAtomic,
49}
50
51/// All possible rule expressions
52///
53/// # Warning: Semantic Versioning
54/// There may be non-breaking changes to the meta-grammar
55/// between minor versions. Those non-breaking changes, however,
56/// may translate into semver-breaking changes due to the additional variants
57/// propaged from the `Rule` enum. This is a known issue and will be fixed in the
58/// future (e.g. by increasing MSRV and non_exhaustive annotations).
59#[derive(Clone, Debug, Eq, PartialEq)]
60pub enum Expr {
61    /// Matches an exact string, e.g. `"a"`
62    Str(String),
63    /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
64    Insens(String),
65    /// Matches one character in the range, e.g. `'a'..'z'`
66    Range(String, String),
67    /// Matches the rule with the given name, e.g. `a`
68    Ident(String),
69    /// Matches a custom part of the stack, e.g. `PEEK[..]`
70    PeekSlice(i32, Option<i32>),
71    /// Positive lookahead; matches expression without making progress, e.g. `&e`
72    PosPred(Box<Expr>),
73    /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
74    NegPred(Box<Expr>),
75    /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
76    Seq(Box<Expr>, Box<Expr>),
77    /// Matches either of two expressions, e.g. `e1 | e2`
78    Choice(Box<Expr>, Box<Expr>),
79    /// Optionally matches an expression, e.g. `e?`
80    Opt(Box<Expr>),
81    /// Matches an expression zero or more times, e.g. `e*`
82    Rep(Box<Expr>),
83    /// Matches an expression one or more times, e.g. `e+`
84    RepOnce(Box<Expr>),
85    /// Matches an expression an exact number of times, e.g. `e{n}`
86    RepExact(Box<Expr>, u32),
87    /// Matches an expression at least a number of times, e.g. `e{n,}`
88    RepMin(Box<Expr>, u32),
89    /// Matches an expression at most a number of times, e.g. `e{,n}`
90    RepMax(Box<Expr>, u32),
91    /// Matches an expression a number of times within a range, e.g. `e{m, n}`
92    RepMinMax(Box<Expr>, u32, u32),
93    /// Continues to match expressions until one of the strings in the `Vec` is found
94    Skip(Vec<String>),
95    /// Matches an expression and pushes it to the stack, e.g. `push(e)`
96    Push(Box<Expr>),
97    /// Matches an expression and assigns a label to it, e.g. #label = exp
98    #[cfg(feature = "grammar-extras")]
99    NodeTag(Box<Expr>, String),
100}
101
102impl Expr {
103    /// Returns the iterator that steps the expression from top to bottom.
104    pub fn iter_top_down(&self) -> ExprTopDownIterator {
105        ExprTopDownIterator::new(self)
106    }
107
108    /// Applies `f` to the expression and all its children (top to bottom).
109    pub fn map_top_down<F>(self, mut f: F) -> Expr
110    where
111        F: FnMut(Expr) -> Expr,
112    {
113        fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
114        where
115            F: FnMut(Expr) -> Expr,
116        {
117            let expr = f(expr);
118
119            match expr {
120                Expr::PosPred(expr) => {
121                    let mapped = Box::new(map_internal(*expr, f));
122                    Expr::PosPred(mapped)
123                }
124                Expr::NegPred(expr) => {
125                    let mapped = Box::new(map_internal(*expr, f));
126                    Expr::NegPred(mapped)
127                }
128                Expr::Seq(lhs, rhs) => {
129                    let mapped_lhs = Box::new(map_internal(*lhs, f));
130                    let mapped_rhs = Box::new(map_internal(*rhs, f));
131                    Expr::Seq(mapped_lhs, mapped_rhs)
132                }
133                Expr::Choice(lhs, rhs) => {
134                    let mapped_lhs = Box::new(map_internal(*lhs, f));
135                    let mapped_rhs = Box::new(map_internal(*rhs, f));
136                    Expr::Choice(mapped_lhs, mapped_rhs)
137                }
138                Expr::Rep(expr) => {
139                    let mapped = Box::new(map_internal(*expr, f));
140                    Expr::Rep(mapped)
141                }
142                Expr::RepOnce(expr) => {
143                    let mapped = Box::new(map_internal(*expr, f));
144                    Expr::RepOnce(mapped)
145                }
146                Expr::RepExact(expr, max) => {
147                    let mapped = Box::new(map_internal(*expr, f));
148                    Expr::RepExact(mapped, max)
149                }
150                Expr::RepMin(expr, num) => {
151                    let mapped = Box::new(map_internal(*expr, f));
152                    Expr::RepMin(mapped, num)
153                }
154                Expr::RepMax(expr, num) => {
155                    let mapped = Box::new(map_internal(*expr, f));
156                    Expr::RepMax(mapped, num)
157                }
158                Expr::RepMinMax(expr, min, max) => {
159                    let mapped = Box::new(map_internal(*expr, f));
160                    Expr::RepMinMax(mapped, min, max)
161                }
162                Expr::Opt(expr) => {
163                    let mapped = Box::new(map_internal(*expr, f));
164                    Expr::Opt(mapped)
165                }
166                Expr::Push(expr) => {
167                    let mapped = Box::new(map_internal(*expr, f));
168                    Expr::Push(mapped)
169                }
170                #[cfg(feature = "grammar-extras")]
171                Expr::NodeTag(expr, tag) => {
172                    let mapped = Box::new(map_internal(*expr, f));
173                    Expr::NodeTag(mapped, tag)
174                }
175                expr => expr,
176            }
177        }
178
179        map_internal(self, &mut f)
180    }
181
182    /// Applies `f` to the expression and all its children (bottom up).
183    pub fn map_bottom_up<F>(self, mut f: F) -> Expr
184    where
185        F: FnMut(Expr) -> Expr,
186    {
187        fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
188        where
189            F: FnMut(Expr) -> Expr,
190        {
191            let mapped = match expr {
192                Expr::PosPred(expr) => {
193                    let mapped = Box::new(map_internal(*expr, f));
194                    Expr::PosPred(mapped)
195                }
196                Expr::NegPred(expr) => {
197                    let mapped = Box::new(map_internal(*expr, f));
198                    Expr::NegPred(mapped)
199                }
200                Expr::Seq(lhs, rhs) => {
201                    let mapped_lhs = Box::new(map_internal(*lhs, f));
202                    let mapped_rhs = Box::new(map_internal(*rhs, f));
203                    Expr::Seq(mapped_lhs, mapped_rhs)
204                }
205                Expr::Choice(lhs, rhs) => {
206                    let mapped_lhs = Box::new(map_internal(*lhs, f));
207                    let mapped_rhs = Box::new(map_internal(*rhs, f));
208                    Expr::Choice(mapped_lhs, mapped_rhs)
209                }
210                Expr::Rep(expr) => {
211                    let mapped = Box::new(map_internal(*expr, f));
212                    Expr::Rep(mapped)
213                }
214                Expr::RepOnce(expr) => {
215                    let mapped = Box::new(map_internal(*expr, f));
216                    Expr::RepOnce(mapped)
217                }
218                Expr::RepExact(expr, num) => {
219                    let mapped = Box::new(map_internal(*expr, f));
220                    Expr::RepExact(mapped, num)
221                }
222                Expr::RepMin(expr, max) => {
223                    let mapped = Box::new(map_internal(*expr, f));
224                    Expr::RepMin(mapped, max)
225                }
226                Expr::RepMax(expr, max) => {
227                    let mapped = Box::new(map_internal(*expr, f));
228                    Expr::RepMax(mapped, max)
229                }
230                Expr::RepMinMax(expr, min, max) => {
231                    let mapped = Box::new(map_internal(*expr, f));
232                    Expr::RepMinMax(mapped, min, max)
233                }
234                Expr::Opt(expr) => {
235                    let mapped = Box::new(map_internal(*expr, f));
236                    Expr::Opt(mapped)
237                }
238                Expr::Push(expr) => {
239                    let mapped = Box::new(map_internal(*expr, f));
240                    Expr::Push(mapped)
241                }
242                #[cfg(feature = "grammar-extras")]
243                Expr::NodeTag(expr, tag) => {
244                    let mapped = Box::new(map_internal(*expr, f));
245                    Expr::NodeTag(mapped, tag)
246                }
247                expr => expr,
248            };
249
250            f(mapped)
251        }
252
253        map_internal(self, &mut f)
254    }
255}
256
257impl core::fmt::Display for Expr {
258    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
259        match self {
260            Expr::Str(s) => write!(f, "{:?}", s),
261            Expr::Insens(s) => write!(f, "^{:?}", s),
262            Expr::Range(start, end) => {
263                let start = start.chars().next().expect("Empty range start.");
264                let end = end.chars().next().expect("Empty range end.");
265                write!(f, "({:?}..{:?})", start, end)
266            }
267            Expr::Ident(id) => write!(f, "{}", id),
268            Expr::PeekSlice(start, end) => match end {
269                Some(end) => write!(f, "PEEK[{}..{}]", start, end),
270                None => write!(f, "PEEK[{}..]", start),
271            },
272            Expr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
273            Expr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
274            Expr::Seq(lhs, rhs) => {
275                let mut nodes = Vec::new();
276                nodes.push(lhs);
277                let mut current = rhs;
278                while let Expr::Seq(lhs, rhs) = current.as_ref() {
279                    nodes.push(lhs);
280                    current = rhs;
281                }
282                nodes.push(current);
283                let sequence = nodes
284                    .iter()
285                    .map(|node| format!("{}", node))
286                    .collect::<Vec<_>>()
287                    .join(" ~ ");
288                write!(f, "({})", sequence)
289            }
290            Expr::Choice(lhs, rhs) => {
291                let mut nodes = Vec::new();
292                nodes.push(lhs);
293                let mut current = rhs;
294                while let Expr::Choice(lhs, rhs) = current.as_ref() {
295                    nodes.push(lhs);
296                    current = rhs;
297                }
298                nodes.push(current);
299                let sequence = nodes
300                    .iter()
301                    .map(|node| format!("{}", node))
302                    .collect::<Vec<_>>()
303                    .join(" | ");
304                write!(f, "({})", sequence)
305            }
306            Expr::Opt(expr) => write!(f, "{}?", expr),
307            Expr::Rep(expr) => write!(f, "{}*", expr),
308            Expr::RepOnce(expr) => write!(f, "{}+", expr),
309            Expr::RepExact(expr, n) => write!(f, "{}{{{}}}", expr, n),
310            Expr::RepMin(expr, min) => write!(f, "{}{{{},}}", expr, min),
311            Expr::RepMax(expr, max) => write!(f, "{}{{,{}}}", expr, max),
312            Expr::RepMinMax(expr, min, max) => write!(f, "{}{{{}, {}}}", expr, min, max),
313            Expr::Skip(strings) => {
314                let strings = strings
315                    .iter()
316                    .map(|s| format!("{:?}", s))
317                    .collect::<Vec<_>>()
318                    .join(" | ");
319                write!(f, "(!({}) ~ ANY)*", strings)
320            }
321            Expr::Push(expr) => write!(f, "PUSH({})", expr),
322            #[cfg(feature = "grammar-extras")]
323            Expr::NodeTag(expr, tag) => {
324                write!(f, "(#{} = {})", tag, expr)
325            }
326        }
327    }
328}
329
330/// The top down iterator for an expression.
331pub struct ExprTopDownIterator {
332    current: Option<Expr>,
333    next: Option<Expr>,
334    right_branches: Vec<Expr>,
335}
336
337impl ExprTopDownIterator {
338    /// Constructs a top-down iterator from the expression.
339    pub fn new(expr: &Expr) -> Self {
340        let mut iter = ExprTopDownIterator {
341            current: None,
342            next: None,
343            right_branches: vec![],
344        };
345        iter.iterate_expr(expr.clone());
346        iter
347    }
348
349    fn iterate_expr(&mut self, expr: Expr) {
350        self.current = Some(expr.clone());
351        match expr {
352            Expr::Seq(lhs, rhs) => {
353                self.right_branches.push(*rhs);
354                self.next = Some(*lhs);
355            }
356            Expr::Choice(lhs, rhs) => {
357                self.right_branches.push(*rhs);
358                self.next = Some(*lhs);
359            }
360            Expr::PosPred(expr)
361            | Expr::NegPred(expr)
362            | Expr::Rep(expr)
363            | Expr::RepOnce(expr)
364            | Expr::RepExact(expr, _)
365            | Expr::RepMin(expr, _)
366            | Expr::RepMax(expr, _)
367            | Expr::RepMinMax(expr, ..)
368            | Expr::Opt(expr)
369            | Expr::Push(expr) => {
370                self.next = Some(*expr);
371            }
372            #[cfg(feature = "grammar-extras")]
373            Expr::NodeTag(expr, _) => {
374                self.next = Some(*expr);
375            }
376            _ => {
377                self.next = None;
378            }
379        }
380    }
381}
382
383impl Iterator for ExprTopDownIterator {
384    type Item = Expr;
385
386    fn next(&mut self) -> Option<Self::Item> {
387        let result = self.current.take();
388
389        if let Some(expr) = self.next.take() {
390            self.iterate_expr(expr);
391        } else if let Some(expr) = self.right_branches.pop() {
392            self.iterate_expr(expr);
393        }
394
395        result
396    }
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    #[test]
404    fn top_down_iterator() {
405        let expr = Expr::Choice(
406            Box::new(Expr::Str(String::from("a"))),
407            Box::new(Expr::Str(String::from("b"))),
408        );
409        let mut top_down = expr.iter_top_down();
410        assert_eq!(top_down.next(), Some(expr));
411        assert_eq!(top_down.next(), Some(Expr::Str(String::from("a"))));
412        assert_eq!(top_down.next(), Some(Expr::Str(String::from("b"))));
413        assert_eq!(top_down.next(), None);
414    }
415
416    #[test]
417    fn identity() {
418        let expr = Expr::Choice(
419            Box::new(Expr::Seq(
420                Box::new(Expr::Ident("a".to_owned())),
421                Box::new(Expr::Str("b".to_owned())),
422            )),
423            Box::new(Expr::PosPred(Box::new(Expr::NegPred(Box::new(Expr::Rep(
424                Box::new(Expr::RepOnce(Box::new(Expr::Opt(Box::new(Expr::Choice(
425                    Box::new(Expr::Insens("c".to_owned())),
426                    Box::new(Expr::Push(Box::new(Expr::Range(
427                        "'d'".to_owned(),
428                        "'e'".to_owned(),
429                    )))),
430                )))))),
431            )))))),
432        );
433
434        assert_eq!(
435            expr.clone()
436                .map_bottom_up(|expr| expr)
437                .map_top_down(|expr| expr),
438            expr,
439        );
440    }
441
442    mod display {
443        use super::super::*;
444
445        #[test]
446        fn string() {
447            assert_eq!(Expr::Str("a".to_owned()).to_string(), r#""a""#);
448        }
449
450        #[test]
451        fn insens() {
452            assert_eq!(Expr::Insens("a".to_owned()).to_string(), r#"^"a""#);
453        }
454
455        #[test]
456        fn range() {
457            assert_eq!(
458                Expr::Range("a".to_owned(), "z".to_owned()).to_string(),
459                r#"('a'..'z')"#,
460            );
461        }
462
463        #[test]
464        fn ident() {
465            assert_eq!(Expr::Ident("a".to_owned()).to_string(), "a");
466        }
467
468        #[test]
469        fn peek_slice() {
470            assert_eq!(Expr::PeekSlice(0, None).to_string(), "PEEK[0..]");
471            assert_eq!(Expr::PeekSlice(0, Some(-1)).to_string(), "PEEK[0..-1]");
472        }
473
474        #[test]
475        fn pos_pred() {
476            assert_eq!(
477                Expr::PosPred(Box::new(Expr::Ident("e".to_owned()))).to_string(),
478                "&e",
479            );
480        }
481
482        #[test]
483        fn neg_pred() {
484            assert_eq!(
485                Expr::NegPred(Box::new(Expr::Ident("e".to_owned()))).to_string(),
486                "!e",
487            );
488        }
489
490        #[test]
491        fn seq() {
492            assert_eq!(
493                Expr::Seq(
494                    Box::new(Expr::Ident("e1".to_owned())),
495                    Box::new(Expr::Ident("e2".to_owned())),
496                )
497                .to_string(),
498                "(e1 ~ e2)",
499            );
500            assert_eq!(
501                Expr::Seq(
502                    Box::new(Expr::Ident("e1".to_owned())),
503                    Box::new(Expr::Seq(
504                        Box::new(Expr::Ident("e2".to_owned())),
505                        Box::new(Expr::Ident("e3".to_owned())),
506                    )),
507                )
508                .to_string(),
509                "(e1 ~ e2 ~ e3)",
510            );
511            assert_eq!(
512                Expr::Seq(
513                    Box::new(Expr::Ident("e1".to_owned())),
514                    Box::new(Expr::Seq(
515                        Box::new(Expr::Ident("e2".to_owned())),
516                        Box::new(Expr::Seq(
517                            Box::new(Expr::Ident("e3".to_owned())),
518                            Box::new(Expr::Ident("e4".to_owned())),
519                        )),
520                    )),
521                )
522                .to_string(),
523                "(e1 ~ e2 ~ e3 ~ e4)",
524            );
525            assert_eq!(
526                Expr::Seq(
527                    Box::new(Expr::Ident("e1".to_owned())),
528                    Box::new(Expr::Choice(
529                        Box::new(Expr::Ident("e2".to_owned())),
530                        Box::new(Expr::Seq(
531                            Box::new(Expr::Ident("e3".to_owned())),
532                            Box::new(Expr::Ident("e4".to_owned())),
533                        )),
534                    )),
535                )
536                .to_string(),
537                "(e1 ~ (e2 | (e3 ~ e4)))",
538            );
539            assert_eq!(
540                Expr::Seq(
541                    Box::new(Expr::Ident("e1".to_owned())),
542                    Box::new(Expr::Seq(
543                        Box::new(Expr::Ident("e2".to_owned())),
544                        Box::new(Expr::Choice(
545                            Box::new(Expr::Ident("e3".to_owned())),
546                            Box::new(Expr::Ident("e4".to_owned())),
547                        )),
548                    )),
549                )
550                .to_string(),
551                "(e1 ~ e2 ~ (e3 | e4))",
552            );
553        }
554
555        #[test]
556        fn choice() {
557            assert_eq!(
558                Expr::Choice(
559                    Box::new(Expr::Ident("e1".to_owned())),
560                    Box::new(Expr::Ident("e2".to_owned())),
561                )
562                .to_string(),
563                "(e1 | e2)",
564            );
565            assert_eq!(
566                Expr::Choice(
567                    Box::new(Expr::Ident("e1".to_owned())),
568                    Box::new(Expr::Choice(
569                        Box::new(Expr::Ident("e2".to_owned())),
570                        Box::new(Expr::Ident("e3".to_owned())),
571                    )),
572                )
573                .to_string(),
574                "(e1 | e2 | e3)",
575            );
576            assert_eq!(
577                Expr::Choice(
578                    Box::new(Expr::Ident("e1".to_owned())),
579                    Box::new(Expr::Choice(
580                        Box::new(Expr::Ident("e2".to_owned())),
581                        Box::new(Expr::Choice(
582                            Box::new(Expr::Ident("e3".to_owned())),
583                            Box::new(Expr::Ident("e4".to_owned())),
584                        )),
585                    )),
586                )
587                .to_string(),
588                "(e1 | e2 | e3 | e4)",
589            );
590            assert_eq!(
591                Expr::Choice(
592                    Box::new(Expr::Ident("e1".to_owned())),
593                    Box::new(Expr::Seq(
594                        Box::new(Expr::Ident("e2".to_owned())),
595                        Box::new(Expr::Choice(
596                            Box::new(Expr::Ident("e3".to_owned())),
597                            Box::new(Expr::Ident("e4".to_owned())),
598                        )),
599                    )),
600                )
601                .to_string(),
602                "(e1 | (e2 ~ (e3 | e4)))",
603            );
604        }
605
606        #[test]
607        fn opt() {
608            assert_eq!(
609                Expr::Opt(Box::new(Expr::Ident("e".to_owned()))).to_string(),
610                "e?",
611            );
612        }
613
614        #[test]
615        fn rep() {
616            assert_eq!(
617                Expr::Rep(Box::new(Expr::Ident("e".to_owned()))).to_string(),
618                "e*",
619            );
620        }
621
622        #[test]
623        fn rep_once() {
624            assert_eq!(
625                Expr::RepOnce(Box::new(Expr::Ident("e".to_owned()))).to_string(),
626                "e+",
627            );
628        }
629
630        #[test]
631        fn rep_exact() {
632            assert_eq!(
633                Expr::RepExact(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
634                "e{1}",
635            );
636        }
637
638        #[test]
639        fn rep_min() {
640            assert_eq!(
641                Expr::RepMin(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
642                "e{1,}",
643            );
644        }
645
646        #[test]
647        fn rep_max() {
648            assert_eq!(
649                Expr::RepMax(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
650                "e{,1}",
651            );
652        }
653
654        #[test]
655        fn rep_min_max() {
656            assert_eq!(
657                Expr::RepMinMax(Box::new(Expr::Ident("e".to_owned())), 1, 2).to_string(),
658                "e{1, 2}",
659            );
660        }
661
662        #[test]
663        fn skip() {
664            assert_eq!(
665                Expr::Skip(
666                    ["a", "bc"]
667                        .into_iter()
668                        .map(|s| s.to_owned())
669                        .collect::<Vec<_>>(),
670                )
671                .to_string(),
672                r#"(!("a" | "bc") ~ ANY)*"#,
673            );
674        }
675
676        #[test]
677        fn push() {
678            assert_eq!(
679                Expr::Push(Box::new(Expr::Ident("e".to_owned()))).to_string(),
680                "PUSH(e)",
681            );
682        }
683
684        #[test]
685        #[cfg(feature = "grammar-extras")]
686        fn node_tag() {
687            assert_eq!(
688                Expr::NodeTag(Box::new(Expr::Ident("expr".to_owned())), "label".to_owned())
689                    .to_string(),
690                "(#label = expr)",
691            );
692        }
693    }
694}