pest_generator/
lib.rs

1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9
10#![doc(
11    html_root_url = "https://docs.rs/pest_derive",
12    html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg",
13    html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg"
14)]
15#![warn(missing_docs, rust_2018_idioms, unused_qualifications)]
16#![recursion_limit = "256"]
17//! # pest generator
18//!
19//! This crate generates code from ASTs (which is used in the `pest_derive` crate).
20
21#[macro_use]
22extern crate quote;
23
24use std::env;
25use std::fs::File;
26use std::io::{self, Read};
27use std::path::Path;
28
29use proc_macro2::TokenStream;
30use syn::{Attribute, DeriveInput, Expr, ExprLit, Generics, Ident, Lit, Meta};
31
32#[macro_use]
33mod macros;
34mod docs;
35mod generator;
36
37use pest_meta::parser::{self, rename_meta_rule, Rule};
38use pest_meta::{optimizer, unwrap_or_report, validator};
39
40/// Processes the derive/proc macro input and generates the corresponding parser based
41/// on the parsed grammar. If `include_grammar` is set to true, it'll generate an explicit
42/// "include_str" statement (done in pest_derive, but turned off in the local bootstrap).
43pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream {
44    let ast: DeriveInput = syn::parse2(input).unwrap();
45    let (parsed_derive, contents) = parse_derive(ast);
46
47    let mut data = String::new();
48    let mut paths = vec![];
49
50    for content in contents {
51        let (_data, _path) = match content {
52            GrammarSource::File(ref path) => {
53                let root = env::var("CARGO_MANIFEST_DIR").unwrap_or_else(|_| ".".into());
54
55                // Check whether we can find a file at the path relative to the CARGO_MANIFEST_DIR
56                // first.
57                //
58                // If we cannot find the expected file over there, fallback to the
59                // `CARGO_MANIFEST_DIR/src`, which is the old default and kept for convenience
60                // reasons.
61                // TODO: This could be refactored once `std::path::absolute()` get's stabilized.
62                // https://doc.rust-lang.org/std/path/fn.absolute.html
63                let path = if Path::new(&root).join(path).exists() {
64                    Path::new(&root).join(path)
65                } else {
66                    Path::new(&root).join("src/").join(path)
67                };
68
69                let file_name = match path.file_name() {
70                    Some(file_name) => file_name,
71                    None => panic!("grammar attribute should point to a file"),
72                };
73
74                let data = match read_file(&path) {
75                    Ok(data) => data,
76                    Err(error) => panic!("error opening {:?}: {}", file_name, error),
77                };
78                (data, Some(path.clone()))
79            }
80            GrammarSource::Inline(content) => (content, None),
81        };
82
83        data.push_str(&_data);
84        if let Some(path) = _path {
85            paths.push(path);
86        }
87    }
88
89    let pairs = match parser::parse(Rule::grammar_rules, &data) {
90        Ok(pairs) => pairs,
91        Err(error) => panic!("error parsing \n{}", error.renamed_rules(rename_meta_rule)),
92    };
93
94    let defaults = unwrap_or_report(validator::validate_pairs(pairs.clone()));
95    let doc_comment = docs::consume(pairs.clone());
96    let ast = unwrap_or_report(parser::consume_rules(pairs));
97    let optimized = optimizer::optimize(ast);
98
99    generator::generate(
100        parsed_derive,
101        paths,
102        optimized,
103        defaults,
104        &doc_comment,
105        include_grammar,
106    )
107}
108
109fn read_file<P: AsRef<Path>>(path: P) -> io::Result<String> {
110    let mut file = File::open(path.as_ref())?;
111    let mut string = String::new();
112    file.read_to_string(&mut string)?;
113    Ok(string)
114}
115
116#[derive(Debug, PartialEq)]
117enum GrammarSource {
118    File(String),
119    Inline(String),
120}
121
122struct ParsedDerive {
123    pub(crate) name: Ident,
124    pub(crate) generics: Generics,
125    pub(crate) non_exhaustive: bool,
126}
127
128fn parse_derive(ast: DeriveInput) -> (ParsedDerive, Vec<GrammarSource>) {
129    let name = ast.ident;
130    let generics = ast.generics;
131
132    let grammar: Vec<&Attribute> = ast
133        .attrs
134        .iter()
135        .filter(|attr| {
136            let path = attr.meta.path();
137            path.is_ident("grammar") || path.is_ident("grammar_inline")
138        })
139        .collect();
140
141    if grammar.is_empty() {
142        panic!("a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute");
143    }
144
145    let mut grammar_sources = Vec::with_capacity(grammar.len());
146    for attr in grammar {
147        grammar_sources.push(get_attribute(attr))
148    }
149
150    let non_exhaustive = ast
151        .attrs
152        .iter()
153        .any(|attr| attr.meta.path().is_ident("non_exhaustive"));
154
155    (
156        ParsedDerive {
157            name,
158            generics,
159            non_exhaustive,
160        },
161        grammar_sources,
162    )
163}
164
165fn get_attribute(attr: &Attribute) -> GrammarSource {
166    match &attr.meta {
167        Meta::NameValue(name_value) => match &name_value.value {
168            Expr::Lit(ExprLit {
169                lit: Lit::Str(string),
170                ..
171            }) => {
172                if name_value.path.is_ident("grammar") {
173                    GrammarSource::File(string.value())
174                } else {
175                    GrammarSource::Inline(string.value())
176                }
177            }
178            _ => panic!("grammar attribute must be a string"),
179        },
180        _ => panic!("grammar attribute must be of the form `grammar = \"...\"`"),
181    }
182}
183
184#[cfg(test)]
185mod tests {
186    use super::parse_derive;
187    use super::GrammarSource;
188
189    #[test]
190    fn derive_inline_file() {
191        let definition = "
192            #[other_attr]
193            #[grammar_inline = \"GRAMMAR\"]
194            pub struct MyParser<'a, T>;
195        ";
196        let ast = syn::parse_str(definition).unwrap();
197        let (_, filenames) = parse_derive(ast);
198        assert_eq!(filenames, [GrammarSource::Inline("GRAMMAR".to_string())]);
199    }
200
201    #[test]
202    fn derive_ok() {
203        let definition = "
204            #[other_attr]
205            #[grammar = \"myfile.pest\"]
206            pub struct MyParser<'a, T>;
207        ";
208        let ast = syn::parse_str(definition).unwrap();
209        let (parsed_derive, filenames) = parse_derive(ast);
210        assert_eq!(filenames, [GrammarSource::File("myfile.pest".to_string())]);
211        assert!(!parsed_derive.non_exhaustive);
212    }
213
214    #[test]
215    fn derive_multiple_grammars() {
216        let definition = "
217            #[other_attr]
218            #[grammar = \"myfile1.pest\"]
219            #[grammar = \"myfile2.pest\"]
220            pub struct MyParser<'a, T>;
221        ";
222        let ast = syn::parse_str(definition).unwrap();
223        let (_, filenames) = parse_derive(ast);
224        assert_eq!(
225            filenames,
226            [
227                GrammarSource::File("myfile1.pest".to_string()),
228                GrammarSource::File("myfile2.pest".to_string())
229            ]
230        );
231    }
232
233    #[test]
234    fn derive_nonexhaustive() {
235        let definition = "
236            #[non_exhaustive]
237            #[grammar = \"myfile.pest\"]
238            pub struct MyParser<'a, T>;
239        ";
240        let ast = syn::parse_str(definition).unwrap();
241        let (parsed_derive, filenames) = parse_derive(ast);
242        assert_eq!(filenames, [GrammarSource::File("myfile.pest".to_string())]);
243        assert!(parsed_derive.non_exhaustive);
244    }
245
246    #[test]
247    #[should_panic(expected = "grammar attribute must be a string")]
248    fn derive_wrong_arg() {
249        let definition = "
250            #[other_attr]
251            #[grammar = 1]
252            pub struct MyParser<'a, T>;
253        ";
254        let ast = syn::parse_str(definition).unwrap();
255        parse_derive(ast);
256    }
257
258    #[test]
259    #[should_panic(
260        expected = "a grammar file needs to be provided with the #[grammar = \"PATH\"] or #[grammar_inline = \"GRAMMAR CONTENTS\"] attribute"
261    )]
262    fn derive_no_grammar() {
263        let definition = "
264            #[other_attr]
265            pub struct MyParser<'a, T>;
266        ";
267        let ast = syn::parse_str(definition).unwrap();
268        parse_derive(ast);
269    }
270
271    #[doc = "Matches dar\n\nMatch dar description\n"]
272    #[test]
273    fn test_generate_doc() {
274        let input = quote! {
275            #[derive(Parser)]
276            #[non_exhaustive]
277            #[grammar = "../tests/test.pest"]
278            pub struct TestParser;
279        };
280
281        let token = super::derive_parser(input, true);
282
283        let expected = quote! {
284            #[doc = "A parser for JSON file.\nAnd this is a example for JSON parser.\n\n    indent-4-space\n"]
285            #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)]
286            #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
287            #[non_exhaustive]
288            pub enum Rule {
289                #[doc = "Matches foo str, e.g.: `foo`"]
290                r#foo,
291                #[doc = "Matches bar str\n\n  Indent 2, e.g: `bar` or `foobar`"]
292                r#bar,
293                r#bar1,
294                #[doc = "Matches dar\n\nMatch dar description\n"]
295                r#dar
296            }
297        };
298
299        assert!(
300            token.to_string().contains(expected.to_string().as_str()),
301            "{}\n\nExpected to contains:\n{}",
302            token,
303            expected
304        );
305    }
306}