pest_meta/optimizer/
mod.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 DragoČ™ Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10//! Different optimizations for pest's ASTs.
11
12use crate::ast::*;
13use std::collections::HashMap;
14
15#[cfg(test)]
16macro_rules! box_tree {
17    ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18      $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19    );
20    ($expr:expr) => ($expr);
21}
22
23mod concatenator;
24mod factorizer;
25mod lister;
26mod restorer;
27mod rotater;
28mod skipper;
29mod unroller;
30
31/// Takes pest's ASTs and optimizes them
32pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33    let optimized: Vec<OptimizedRule> = rules
34        .into_iter()
35        .map(rotater::rotate)
36        .map(skipper::skip)
37        .map(unroller::unroll)
38        .map(concatenator::concatenate)
39        .map(factorizer::factor)
40        .map(lister::list)
41        .map(rule_to_optimized_rule)
42        .collect();
43
44    let rules = to_hash_map(&optimized);
45    optimized
46        .into_iter()
47        .map(|rule| restorer::restore_on_err(rule, &rules))
48        .collect()
49}
50
51fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
52    fn to_optimized(expr: Expr) -> OptimizedExpr {
53        match expr {
54            Expr::Str(string) => OptimizedExpr::Str(string),
55            Expr::Insens(string) => OptimizedExpr::Insens(string),
56            Expr::Range(start, end) => OptimizedExpr::Range(start, end),
57            Expr::Ident(ident) => OptimizedExpr::Ident(ident),
58            Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
59            Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
60            Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
61            Expr::Seq(lhs, rhs) => {
62                OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
63            }
64            Expr::Choice(lhs, rhs) => {
65                OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
66            }
67            Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
68            Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
69            Expr::Skip(strings) => OptimizedExpr::Skip(strings),
70            Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
71            #[cfg(feature = "grammar-extras")]
72            Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag),
73            #[cfg(feature = "grammar-extras")]
74            Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))),
75            #[cfg(not(feature = "grammar-extras"))]
76            Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule"),
77            Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => {
78                unreachable!("No valid transformation to OptimizedRule")
79            }
80        }
81    }
82
83    OptimizedRule {
84        name: rule.name,
85        ty: rule.ty,
86        expr: to_optimized(rule.expr),
87    }
88}
89
90fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> {
91    rules
92        .iter()
93        .map(|r| (r.name.clone(), r.expr.clone()))
94        .collect()
95}
96
97/// The optimized version of the pest AST's `Rule`.
98#[derive(Clone, Debug, Eq, PartialEq)]
99pub struct OptimizedRule {
100    /// The name of the rule.
101    pub name: String,
102    /// The type of the rule.
103    pub ty: RuleType,
104    /// The optimized expression of the rule.
105    pub expr: OptimizedExpr,
106}
107
108/// The optimized version of the pest AST's `Expr`.
109///
110/// # Warning: Semantic Versioning
111/// There may be non-breaking changes to the meta-grammar
112/// between minor versions. Those non-breaking changes, however,
113/// may translate into semver-breaking changes due to the additional variants
114/// propaged from the `Rule` enum. This is a known issue and will be fixed in the
115/// future (e.g. by increasing MSRV and non_exhaustive annotations).
116#[derive(Clone, Debug, Eq, PartialEq)]
117pub enum OptimizedExpr {
118    /// Matches an exact string, e.g. `"a"`
119    Str(String),
120    /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
121    Insens(String),
122    /// Matches one character in the range, e.g. `'a'..'z'`
123    Range(String, String),
124    /// Matches the rule with the given name, e.g. `a`
125    Ident(String),
126    /// Matches a custom part of the stack, e.g. `PEEK[..]`
127    PeekSlice(i32, Option<i32>),
128    /// Positive lookahead; matches expression without making progress, e.g. `&e`
129    PosPred(Box<OptimizedExpr>),
130    /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
131    NegPred(Box<OptimizedExpr>),
132    /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
133    Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
134    /// Matches either of two expressions, e.g. `e1 | e2`
135    Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
136    /// Optionally matches an expression, e.g. `e?`
137    Opt(Box<OptimizedExpr>),
138    /// Matches an expression zero or more times, e.g. `e*`
139    Rep(Box<OptimizedExpr>),
140    /// Matches an expression one or more times, e.g. `e+`
141    #[cfg(feature = "grammar-extras")]
142    RepOnce(Box<OptimizedExpr>),
143    /// Continues to match expressions until one of the strings in the `Vec` is found
144    Skip(Vec<String>),
145    /// Matches an expression and pushes it to the stack, e.g. `push(e)`
146    Push(Box<OptimizedExpr>),
147    /// Matches an expression and assigns a label to it, e.g. #label = exp
148    #[cfg(feature = "grammar-extras")]
149    NodeTag(Box<OptimizedExpr>, String),
150    /// Restores an expression's checkpoint
151    RestoreOnErr(Box<OptimizedExpr>),
152}
153
154impl OptimizedExpr {
155    /// Returns a top-down iterator over the `OptimizedExpr`.
156    pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
157        OptimizedExprTopDownIterator::new(self)
158    }
159
160    /// Applies `f` to the `OptimizedExpr` top-down.
161    pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
162    where
163        F: FnMut(OptimizedExpr) -> OptimizedExpr,
164    {
165        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
166        where
167            F: FnMut(OptimizedExpr) -> OptimizedExpr,
168        {
169            let expr = f(expr);
170
171            match expr {
172                OptimizedExpr::PosPred(expr) => {
173                    let mapped = Box::new(map_internal(*expr, f));
174                    OptimizedExpr::PosPred(mapped)
175                }
176                OptimizedExpr::NegPred(expr) => {
177                    let mapped = Box::new(map_internal(*expr, f));
178                    OptimizedExpr::NegPred(mapped)
179                }
180                OptimizedExpr::Seq(lhs, rhs) => {
181                    let mapped_lhs = Box::new(map_internal(*lhs, f));
182                    let mapped_rhs = Box::new(map_internal(*rhs, f));
183                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
184                }
185                OptimizedExpr::Choice(lhs, rhs) => {
186                    let mapped_lhs = Box::new(map_internal(*lhs, f));
187                    let mapped_rhs = Box::new(map_internal(*rhs, f));
188                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
189                }
190                OptimizedExpr::Rep(expr) => {
191                    let mapped = Box::new(map_internal(*expr, f));
192                    OptimizedExpr::Rep(mapped)
193                }
194                OptimizedExpr::Opt(expr) => {
195                    let mapped = Box::new(map_internal(*expr, f));
196                    OptimizedExpr::Opt(mapped)
197                }
198                OptimizedExpr::Push(expr) => {
199                    let mapped = Box::new(map_internal(*expr, f));
200                    OptimizedExpr::Push(mapped)
201                }
202                expr => expr,
203            }
204        }
205
206        map_internal(self, &mut f)
207    }
208
209    /// Applies `f` to the `OptimizedExpr` bottom-up.
210    pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
211    where
212        F: FnMut(OptimizedExpr) -> OptimizedExpr,
213    {
214        fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
215        where
216            F: FnMut(OptimizedExpr) -> OptimizedExpr,
217        {
218            let mapped = match expr {
219                OptimizedExpr::PosPred(expr) => {
220                    let mapped = Box::new(map_internal(*expr, f));
221                    OptimizedExpr::PosPred(mapped)
222                }
223                OptimizedExpr::NegPred(expr) => {
224                    let mapped = Box::new(map_internal(*expr, f));
225                    OptimizedExpr::NegPred(mapped)
226                }
227                OptimizedExpr::Seq(lhs, rhs) => {
228                    let mapped_lhs = Box::new(map_internal(*lhs, f));
229                    let mapped_rhs = Box::new(map_internal(*rhs, f));
230                    OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
231                }
232                OptimizedExpr::Choice(lhs, rhs) => {
233                    let mapped_lhs = Box::new(map_internal(*lhs, f));
234                    let mapped_rhs = Box::new(map_internal(*rhs, f));
235                    OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
236                }
237                OptimizedExpr::Rep(expr) => {
238                    let mapped = Box::new(map_internal(*expr, f));
239                    OptimizedExpr::Rep(mapped)
240                }
241                OptimizedExpr::Opt(expr) => {
242                    let mapped = Box::new(map_internal(*expr, f));
243                    OptimizedExpr::Opt(mapped)
244                }
245                OptimizedExpr::Push(expr) => {
246                    let mapped = Box::new(map_internal(*expr, f));
247                    OptimizedExpr::Push(mapped)
248                }
249                expr => expr,
250            };
251
252            f(mapped)
253        }
254
255        map_internal(self, &mut f)
256    }
257}
258
259impl core::fmt::Display for OptimizedExpr {
260    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
261        match self {
262            OptimizedExpr::Str(s) => write!(f, "{:?}", s),
263            OptimizedExpr::Insens(s) => write!(f, "^{:?}", s),
264            OptimizedExpr::Range(start, end) => {
265                let start = start.chars().next().expect("Empty range start.");
266                let end = end.chars().next().expect("Empty range end.");
267                write!(f, "({:?}..{:?})", start, end)
268            }
269            OptimizedExpr::Ident(id) => write!(f, "{}", id),
270            OptimizedExpr::PeekSlice(start, end) => match end {
271                Some(end) => write!(f, "PEEK[{}..{}]", start, end),
272                None => write!(f, "PEEK[{}..]", start),
273            },
274            OptimizedExpr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
275            OptimizedExpr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
276            OptimizedExpr::Seq(lhs, rhs) => {
277                let mut nodes = Vec::new();
278                nodes.push(lhs);
279                let mut current = rhs;
280                while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() {
281                    nodes.push(lhs);
282                    current = rhs;
283                }
284                nodes.push(current);
285                let sequence = nodes
286                    .iter()
287                    .map(|node| format!("{}", node))
288                    .collect::<Vec<_>>()
289                    .join(" ~ ");
290                write!(f, "({})", sequence)
291            }
292            OptimizedExpr::Choice(lhs, rhs) => {
293                let mut nodes = Vec::new();
294                nodes.push(lhs);
295                let mut current = rhs;
296                while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() {
297                    nodes.push(lhs);
298                    current = rhs;
299                }
300                nodes.push(current);
301                let sequence = nodes
302                    .iter()
303                    .map(|node| format!("{}", node))
304                    .collect::<Vec<_>>()
305                    .join(" | ");
306                write!(f, "({})", sequence)
307            }
308            OptimizedExpr::Opt(expr) => write!(f, "{}?", expr),
309            OptimizedExpr::Rep(expr) => write!(f, "{}*", expr),
310            #[cfg(feature = "grammar-extras")]
311            OptimizedExpr::RepOnce(expr) => write!(f, "{}+", expr),
312            OptimizedExpr::Skip(strings) => {
313                let strings = strings
314                    .iter()
315                    .map(|s| format!("{:?}", s))
316                    .collect::<Vec<_>>()
317                    .join(" | ");
318                write!(f, "(!({}) ~ ANY)*", strings)
319            }
320            OptimizedExpr::Push(expr) => write!(f, "PUSH({})", expr),
321            #[cfg(feature = "grammar-extras")]
322            OptimizedExpr::NodeTag(expr, tag) => {
323                write!(f, "(#{} = {})", tag, expr)
324            }
325            OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f),
326        }
327    }
328}
329
330/// A top-down iterator over an `OptimizedExpr`.
331pub struct OptimizedExprTopDownIterator {
332    current: Option<OptimizedExpr>,
333    next: Option<OptimizedExpr>,
334    right_branches: Vec<OptimizedExpr>,
335}
336
337impl OptimizedExprTopDownIterator {
338    /// Creates a new top down iterator from an `OptimizedExpr`.
339    pub fn new(expr: &OptimizedExpr) -> Self {
340        let mut iter = OptimizedExprTopDownIterator {
341            current: None,
342            next: None,
343            right_branches: vec![],
344        };
345        iter.iterate_expr(expr.clone());
346        iter
347    }
348
349    fn iterate_expr(&mut self, expr: OptimizedExpr) {
350        self.current = Some(expr.clone());
351        match expr {
352            OptimizedExpr::Seq(lhs, rhs) => {
353                self.right_branches.push(*rhs);
354                self.next = Some(*lhs);
355            }
356            OptimizedExpr::Choice(lhs, rhs) => {
357                self.right_branches.push(*rhs);
358                self.next = Some(*lhs);
359            }
360            OptimizedExpr::PosPred(expr)
361            | OptimizedExpr::NegPred(expr)
362            | OptimizedExpr::Rep(expr)
363            | OptimizedExpr::Opt(expr)
364            | OptimizedExpr::Push(expr) => {
365                self.next = Some(*expr);
366            }
367            _ => {
368                self.next = None;
369            }
370        }
371    }
372}
373
374impl Iterator for OptimizedExprTopDownIterator {
375    type Item = OptimizedExpr;
376
377    fn next(&mut self) -> Option<Self::Item> {
378        let result = self.current.take();
379
380        if let Some(expr) = self.next.take() {
381            self.iterate_expr(expr);
382        } else if let Some(expr) = self.right_branches.pop() {
383            self.iterate_expr(expr);
384        }
385
386        result
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393
394    #[test]
395    fn rotate() {
396        let rules = {
397            use crate::ast::Expr::*;
398            vec![Rule {
399                name: "rule".to_owned(),
400                ty: RuleType::Normal,
401                expr: box_tree!(Choice(
402                    Choice(
403                        Choice(Str(String::from("a")), Str(String::from("b"))),
404                        Str(String::from("c"))
405                    ),
406                    Str(String::from("d"))
407                )),
408            }]
409        };
410        let rotated = {
411            use crate::optimizer::OptimizedExpr::*;
412            vec![OptimizedRule {
413                name: "rule".to_owned(),
414                ty: RuleType::Normal,
415                expr: box_tree!(Choice(
416                    Str(String::from("a")),
417                    Choice(
418                        Str(String::from("b")),
419                        Choice(Str(String::from("c")), Str(String::from("d")))
420                    )
421                )),
422            }]
423        };
424
425        assert_eq!(optimize(rules), rotated);
426    }
427
428    #[test]
429    fn skip() {
430        let rules = {
431            use crate::ast::Expr::*;
432            vec![Rule {
433                name: "rule".to_owned(),
434                ty: RuleType::Atomic,
435                expr: box_tree!(Rep(Seq(
436                    NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
437                    Ident(String::from("ANY"))
438                ))),
439            }]
440        };
441        let skipped = vec![OptimizedRule {
442            name: "rule".to_owned(),
443            ty: RuleType::Atomic,
444            expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
445        }];
446
447        assert_eq!(optimize(rules), skipped);
448    }
449
450    #[test]
451    fn concat_strings() {
452        let rules = {
453            use crate::ast::Expr::*;
454            vec![Rule {
455                name: "rule".to_owned(),
456                ty: RuleType::Atomic,
457                expr: box_tree!(Seq(
458                    Seq(Str(String::from("a")), Str(String::from("b"))),
459                    Seq(Str(String::from("c")), Str(String::from("d")))
460                )),
461            }]
462        };
463        let concatenated = vec![OptimizedRule {
464            name: "rule".to_owned(),
465            ty: RuleType::Atomic,
466            expr: OptimizedExpr::Str(String::from("abcd")),
467        }];
468
469        assert_eq!(optimize(rules), concatenated);
470    }
471
472    #[test]
473    fn unroll_loop_exact() {
474        let rules = vec![Rule {
475            name: "rule".to_owned(),
476            ty: RuleType::Atomic,
477            expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
478        }];
479        let unrolled = {
480            use crate::optimizer::OptimizedExpr::*;
481            vec![OptimizedRule {
482                name: "rule".to_owned(),
483                ty: RuleType::Atomic,
484                expr: box_tree!(Seq(
485                    Ident(String::from("a")),
486                    Seq(Ident(String::from("a")), Ident(String::from("a")))
487                )),
488            }]
489        };
490
491        assert_eq!(optimize(rules), unrolled);
492    }
493
494    #[test]
495    fn unroll_loop_max() {
496        let rules = vec![Rule {
497            name: "rule".to_owned(),
498            ty: RuleType::Atomic,
499            expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
500        }];
501        let unrolled = {
502            use crate::optimizer::OptimizedExpr::*;
503            vec![OptimizedRule {
504                name: "rule".to_owned(),
505                ty: RuleType::Atomic,
506                expr: box_tree!(Seq(
507                    Opt(Str(String::from("a"))),
508                    Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
509                )),
510            }]
511        };
512
513        assert_eq!(optimize(rules), unrolled);
514    }
515
516    #[test]
517    fn unroll_loop_min() {
518        let rules = vec![Rule {
519            name: "rule".to_owned(),
520            ty: RuleType::Atomic,
521            expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
522        }];
523        let unrolled = {
524            use crate::optimizer::OptimizedExpr::*;
525            vec![OptimizedRule {
526                name: "rule".to_owned(),
527                ty: RuleType::Atomic,
528                expr: box_tree!(Seq(
529                    Str(String::from("a")),
530                    Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
531                )),
532            }]
533        };
534
535        assert_eq!(optimize(rules), unrolled);
536    }
537
538    #[test]
539    fn unroll_loop_min_max() {
540        let rules = vec![Rule {
541            name: "rule".to_owned(),
542            ty: RuleType::Atomic,
543            expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
544        }];
545        let unrolled = {
546            use crate::optimizer::OptimizedExpr::*;
547            vec![OptimizedRule {
548                name: "rule".to_owned(),
549                ty: RuleType::Atomic,
550                /* TODO possible room for improvement here:
551                 * if the sequences were rolled out in the opposite
552                 * order, we could further optimize the strings
553                 * in cases like this.
554                Str(String::from(("aa")),
555                Opt(Str(String::from("a")))
556                */
557                expr: box_tree!(Seq(
558                    Str(String::from("a")),
559                    Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
560                )),
561            }]
562        };
563
564        assert_eq!(optimize(rules), unrolled);
565    }
566
567    #[test]
568    fn concat_insensitive_strings() {
569        let rules = {
570            use crate::ast::Expr::*;
571            vec![Rule {
572                name: "rule".to_owned(),
573                ty: RuleType::Atomic,
574                expr: box_tree!(Seq(
575                    Seq(Insens(String::from("a")), Insens(String::from("b"))),
576                    Seq(Insens(String::from("c")), Insens(String::from("d")))
577                )),
578            }]
579        };
580        let concatenated = vec![OptimizedRule {
581            name: "rule".to_owned(),
582            ty: RuleType::Atomic,
583            expr: OptimizedExpr::Insens(String::from("abcd")),
584        }];
585
586        assert_eq!(optimize(rules), concatenated);
587    }
588
589    #[test]
590    fn long_common_sequence() {
591        let rules = {
592            use crate::ast::Expr::*;
593            vec![Rule {
594                name: "rule".to_owned(),
595                ty: RuleType::Silent,
596                expr: box_tree!(Choice(
597                    Seq(
598                        Ident(String::from("a")),
599                        Seq(Ident(String::from("b")), Ident(String::from("c")))
600                    ),
601                    Seq(
602                        Seq(Ident(String::from("a")), Ident(String::from("b"))),
603                        Ident(String::from("d"))
604                    )
605                )),
606            }]
607        };
608        let optimized = {
609            use crate::optimizer::OptimizedExpr::*;
610            vec![OptimizedRule {
611                name: "rule".to_owned(),
612                ty: RuleType::Silent,
613                expr: box_tree!(Seq(
614                    Ident(String::from("a")),
615                    Seq(
616                        Ident(String::from("b")),
617                        Choice(Ident(String::from("c")), Ident(String::from("d")))
618                    )
619                )),
620            }]
621        };
622
623        assert_eq!(optimize(rules), optimized);
624    }
625
626    #[test]
627    fn short_common_sequence() {
628        let rules = {
629            use crate::ast::Expr::*;
630            vec![Rule {
631                name: "rule".to_owned(),
632                ty: RuleType::Atomic,
633                expr: box_tree!(Choice(
634                    Seq(Ident(String::from("a")), Ident(String::from("b"))),
635                    Ident(String::from("a"))
636                )),
637            }]
638        };
639        let optimized = {
640            use crate::optimizer::OptimizedExpr::*;
641            vec![OptimizedRule {
642                name: "rule".to_owned(),
643                ty: RuleType::Atomic,
644                expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
645            }]
646        };
647
648        assert_eq!(optimize(rules), optimized);
649    }
650
651    #[test]
652    fn impossible_common_sequence() {
653        let rules = {
654            use crate::ast::Expr::*;
655            vec![Rule {
656                name: "rule".to_owned(),
657                ty: RuleType::Silent,
658                expr: box_tree!(Choice(
659                    Ident(String::from("a")),
660                    Seq(Ident(String::from("a")), Ident(String::from("b")))
661                )),
662            }]
663        };
664        let optimized = {
665            use crate::optimizer::OptimizedExpr::*;
666            vec![OptimizedRule {
667                name: "rule".to_owned(),
668                ty: RuleType::Silent,
669                expr: box_tree!(Ident(String::from("a"))),
670            }]
671        };
672
673        assert_eq!(optimize(rules), optimized);
674    }
675
676    #[test]
677    fn lister() {
678        let rules = {
679            use crate::ast::Expr::*;
680            vec![Rule {
681                name: "rule".to_owned(),
682                ty: RuleType::Silent,
683                expr: box_tree!(Seq(
684                    Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
685                    Ident(String::from("a"))
686                )),
687            }]
688        };
689        let optimized = {
690            use crate::optimizer::OptimizedExpr::*;
691            vec![OptimizedRule {
692                name: "rule".to_owned(),
693                ty: RuleType::Silent,
694                expr: box_tree!(Seq(
695                    Ident(String::from("a")),
696                    Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
697                )),
698            }]
699        };
700
701        assert_eq!(optimize(rules), optimized);
702    }
703
704    mod display {
705        use super::super::*;
706        /// In previous implementation of Display for OptimizedExpr
707        /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me),
708        /// Str("\n") will be displayed as
709        /// "
710        /// "
711        ///
712        /// It will not break the compilation in normal use.
713        ///
714        /// But when I use it in automatically generating documents,
715        /// it will quite confusing and we'll be unable to distinguish \n and \r.
716        ///
717        /// And `cargo expand` will emit codes that can't be compiled,
718        /// for it expand `#[doc("...")]` to `/// ...`,
719        /// and when the document comment breaks the line,
720        /// it will be expanded into wrong codes.
721        #[test]
722        fn control_character() {
723            assert_eq!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\\n\"");
724            assert_eq!(
725                OptimizedExpr::Insens("\n".to_owned()).to_string(),
726                "^\"\\n\"",
727            );
728            assert_eq!(
729                OptimizedExpr::Range("\n".to_owned(), "\r".to_owned()).to_string(),
730                "('\\n'..'\\r')",
731            );
732            assert_eq!(
733                OptimizedExpr::Skip(vec![
734                    "\n".to_owned(),
735                    "\r".to_owned(),
736                    "\n\r".to_owned(),
737                    "\0".to_owned(),
738                ])
739                .to_string(),
740                r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"#,
741            );
742
743            assert_ne!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\n\"");
744        }
745
746        #[test]
747        fn str() {
748            assert_eq!(OptimizedExpr::Str("a".to_owned()).to_string(), r#""a""#);
749        }
750
751        #[test]
752        fn insens() {
753            assert_eq!(OptimizedExpr::Insens("a".to_owned()).to_string(), r#"^"a""#);
754        }
755
756        #[test]
757        fn range() {
758            assert_eq!(
759                OptimizedExpr::Range("a".to_owned(), "z".to_owned()).to_string(),
760                r#"('a'..'z')"#,
761            );
762        }
763
764        #[test]
765        fn ident() {
766            assert_eq!(OptimizedExpr::Ident("a".to_owned()).to_string(), r#"a"#);
767        }
768
769        #[test]
770        fn peek_slice() {
771            assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
772            assert_eq!(
773                OptimizedExpr::PeekSlice(0, Some(-1)).to_string(),
774                "PEEK[0..-1]",
775            );
776            assert_eq!(
777                OptimizedExpr::PeekSlice(2, Some(3)).to_string(),
778                "PEEK[2..3]",
779            );
780            assert_eq!(
781                OptimizedExpr::PeekSlice(2, Some(-1)).to_string(),
782                "PEEK[2..-1]",
783            );
784            assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
785        }
786
787        #[test]
788        fn pos_pred() {
789            assert_eq!(
790                OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new(
791                    OptimizedExpr::Ident("a".to_owned()),
792                ))))
793                .to_string(),
794                "&!a",
795            );
796            assert_eq!(
797                OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice(
798                    Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident(
799                        "a".to_owned(),
800                    )))),
801                    Box::new(OptimizedExpr::Str("a".to_owned())),
802                )))
803                .to_string(),
804                r#"&(a* | "a")"#,
805            );
806            assert_eq!(
807                OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new(
808                    OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a".to_owned()))),
809                ))))
810                .to_string(),
811                "&!a",
812            );
813        }
814
815        #[test]
816        fn neg_pred() {
817            assert_eq!(
818                OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
819                r#"!e"#,
820            );
821            assert_eq!(
822                OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice(
823                    Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
824                        "a".to_owned(),
825                    )))),
826                    Box::new(OptimizedExpr::Str("a".to_owned())),
827                )))
828                .to_string(),
829                r#"!(PUSH(a) | "a")"#,
830            );
831        }
832
833        #[test]
834        fn seq() {
835            assert_eq!(
836                OptimizedExpr::Seq(
837                    Box::new(OptimizedExpr::Ident("e1".to_owned())),
838                    Box::new(OptimizedExpr::Ident("e2".to_owned())),
839                )
840                .to_string(),
841                r#"(e1 ~ e2)"#,
842            );
843            assert_eq!(
844                Expr::Seq(
845                    Box::new(Expr::Ident("e1".to_owned())),
846                    Box::new(Expr::Seq(
847                        Box::new(Expr::Ident("e2".to_owned())),
848                        Box::new(Expr::Ident("e3".to_owned())),
849                    )),
850                )
851                .to_string(),
852                "(e1 ~ e2 ~ e3)",
853            );
854            assert_eq!(
855                Expr::Seq(
856                    Box::new(Expr::Ident("e1".to_owned())),
857                    Box::new(Expr::Seq(
858                        Box::new(Expr::Ident("e2".to_owned())),
859                        Box::new(Expr::Seq(
860                            Box::new(Expr::Ident("e3".to_owned())),
861                            Box::new(Expr::Ident("e4".to_owned())),
862                        )),
863                    )),
864                )
865                .to_string(),
866                "(e1 ~ e2 ~ e3 ~ e4)",
867            );
868            assert_eq!(
869                Expr::Seq(
870                    Box::new(Expr::Ident("e1".to_owned())),
871                    Box::new(Expr::Choice(
872                        Box::new(Expr::Ident("e2".to_owned())),
873                        Box::new(Expr::Seq(
874                            Box::new(Expr::Ident("e3".to_owned())),
875                            Box::new(Expr::Ident("e4".to_owned())),
876                        )),
877                    )),
878                )
879                .to_string(),
880                "(e1 ~ (e2 | (e3 ~ e4)))",
881            );
882            assert_eq!(
883                Expr::Seq(
884                    Box::new(Expr::Ident("e1".to_owned())),
885                    Box::new(Expr::Seq(
886                        Box::new(Expr::Ident("e2".to_owned())),
887                        Box::new(Expr::Choice(
888                            Box::new(Expr::Ident("e3".to_owned())),
889                            Box::new(Expr::Ident("e4".to_owned())),
890                        )),
891                    )),
892                )
893                .to_string(),
894                "(e1 ~ e2 ~ (e3 | e4))",
895            );
896            assert_eq!(
897                OptimizedExpr::Seq(
898                    Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str(
899                        "a".to_owned(),
900                    )))),
901                    Box::new(OptimizedExpr::Seq(
902                        Box::new(OptimizedExpr::Ident("b".to_owned())),
903                        Box::new(OptimizedExpr::Insens("c".to_owned())),
904                    )),
905                )
906                .to_string(),
907                r#"("a"* ~ b ~ ^"c")"#,
908            );
909            assert_eq!(
910                OptimizedExpr::Seq(
911                    Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range(
912                        "a".to_owned(),
913                        "z".to_owned(),
914                    )))),
915                    Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt(
916                        Box::new(OptimizedExpr::Range("A".to_owned(), "Z".to_owned())),
917                    )))),
918                )
919                .to_string(),
920                "(&('a'..'z') ~ !('A'..'Z')?)",
921            );
922        }
923
924        #[test]
925        fn choice() {
926            assert_eq!(
927                OptimizedExpr::Choice(
928                    Box::new(OptimizedExpr::Ident("e1".to_owned())),
929                    Box::new(OptimizedExpr::Ident("e2".to_owned())),
930                )
931                .to_string(),
932                r#"(e1 | e2)"#,
933            );
934            assert_eq!(
935                Expr::Choice(
936                    Box::new(Expr::Ident("e1".to_owned())),
937                    Box::new(Expr::Choice(
938                        Box::new(Expr::Ident("e2".to_owned())),
939                        Box::new(Expr::Ident("e3".to_owned())),
940                    )),
941                )
942                .to_string(),
943                "(e1 | e2 | e3)",
944            );
945            assert_eq!(
946                Expr::Choice(
947                    Box::new(Expr::Ident("e1".to_owned())),
948                    Box::new(Expr::Choice(
949                        Box::new(Expr::Ident("e2".to_owned())),
950                        Box::new(Expr::Choice(
951                            Box::new(Expr::Ident("e3".to_owned())),
952                            Box::new(Expr::Ident("e4".to_owned())),
953                        )),
954                    )),
955                )
956                .to_string(),
957                "(e1 | e2 | e3 | e4)",
958            );
959            assert_eq!(
960                Expr::Choice(
961                    Box::new(Expr::Ident("e1".to_owned())),
962                    Box::new(Expr::Seq(
963                        Box::new(Expr::Ident("e2".to_owned())),
964                        Box::new(Expr::Choice(
965                            Box::new(Expr::Ident("e3".to_owned())),
966                            Box::new(Expr::Ident("e4".to_owned())),
967                        )),
968                    )),
969                )
970                .to_string(),
971                "(e1 | (e2 ~ (e3 | e4)))",
972            );
973            assert_eq!(
974                OptimizedExpr::Choice(
975                    Box::new(OptimizedExpr::Str("a".to_owned())),
976                    Box::new(OptimizedExpr::Choice(
977                        Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
978                            "b".to_owned(),
979                        )))),
980                        Box::new(OptimizedExpr::Insens("c".to_owned())),
981                    )),
982                )
983                .to_string(),
984                r#"("a" | PUSH(b) | ^"c")"#,
985            );
986        }
987
988        #[test]
989        fn opt() {
990            assert_eq!(
991                OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
992                "e?",
993            );
994        }
995
996        #[test]
997        fn rep() {
998            assert_eq!(
999                OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x".to_owned()))).to_string(),
1000                "x*",
1001            );
1002            assert_eq!(
1003                OptimizedExpr::Rep(Box::new(OptimizedExpr::Range(
1004                    "0".to_owned(),
1005                    "9".to_owned(),
1006                )))
1007                .to_string(),
1008                "('0'..'9')*",
1009            );
1010        }
1011
1012        #[test]
1013        #[cfg(feature = "grammar-extras")]
1014        fn rep_once() {
1015            assert_eq!(
1016                OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1017                "e+",
1018            );
1019            assert_eq!(
1020                OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range(
1021                    "0".to_owned(),
1022                    "9".to_owned(),
1023                )))
1024                .to_string(),
1025                "('0'..'9')+",
1026            );
1027        }
1028
1029        #[test]
1030        fn skip() {
1031            assert_eq!(
1032                OptimizedExpr::Skip(
1033                    ["a", "bc"]
1034                        .into_iter()
1035                        .map(|s| s.to_owned())
1036                        .collect::<Vec<_>>(),
1037                )
1038                .to_string(),
1039                r#"(!("a" | "bc") ~ ANY)*"#,
1040            );
1041        }
1042
1043        #[test]
1044        fn push() {
1045            assert_eq!(
1046                OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1047                "PUSH(e)",
1048            );
1049        }
1050
1051        #[test]
1052        #[cfg(feature = "grammar-extras")]
1053        fn node_tag() {
1054            assert_eq!(
1055                OptimizedExpr::NodeTag(
1056                    Box::new(OptimizedExpr::Ident("expr".to_owned())),
1057                    "label".to_owned(),
1058                )
1059                .to_string(),
1060                r#"(#label = expr)"#,
1061            );
1062            assert_eq!(
1063                OptimizedExpr::NodeTag(
1064                    Box::new(OptimizedExpr::Ident("x".to_owned())),
1065                    "X".to_owned(),
1066                )
1067                .to_string(),
1068                r#"(#X = x)"#,
1069            );
1070            assert_eq!(
1071                OptimizedExpr::NodeTag(
1072                    Box::new(OptimizedExpr::Seq(
1073                        Box::new(OptimizedExpr::Ident("x".to_owned())),
1074                        Box::new(OptimizedExpr::Str("y".to_owned())),
1075                    )),
1076                    "X".to_owned(),
1077                )
1078                .to_string(),
1079                r#"(#X = (x ~ "y"))"#,
1080            );
1081        }
1082
1083        #[test]
1084        fn restore_on_err() {
1085            assert_eq!(
1086                OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e".to_owned())))
1087                    .to_string(),
1088                "e",
1089            );
1090        }
1091    }
1092}