rst_parser/conversion/
inline.rs

1use anyhow::Error;
2use pest::iterators::Pair;
3
4use document_tree::{
5    CommonAttributes, Element, ExtraAttributes, HasChildren, attribute_types as at,
6    element_categories as c, elements as e,
7    extra_attributes::{self as a, FootnoteTypeExt},
8    url::Url,
9};
10
11use super::whitespace_normalize_name;
12use crate::pest_rst::Rule;
13
14pub fn convert_inline(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> {
15    Ok(match pair.as_rule() {
16        Rule::str | Rule::str_nested => pair.as_str().into(),
17        Rule::escaped_char => pair.as_str()[1..].into(),
18        Rule::ws_newline => " ".to_owned().into(),
19        Rule::reference => convert_reference(pair)?,
20        Rule::substitution_name => convert_substitution_ref(&pair).into(),
21        Rule::emph => e::Emphasis::with_children(convert_inlines(pair)?).into(),
22        Rule::strong => e::Strong::with_children(convert_inlines(pair)?).into(),
23        Rule::literal => e::Literal::with_children(vec![pair.as_str().to_owned()]).into(),
24        Rule::footnote_reference => convert_footnote_reference(pair).into(),
25        rule => unimplemented!("unknown rule {:?}", rule),
26    })
27}
28
29pub fn convert_inlines(pair: Pair<Rule>) -> Result<Vec<c::TextOrInlineElement>, Error> {
30    pair.into_inner().map(convert_inline).collect()
31}
32
33fn convert_reference(pair: Pair<Rule>) -> Result<c::TextOrInlineElement, Error> {
34    let concrete = pair.into_inner().next().unwrap();
35    match concrete.as_rule() {
36        Rule::reference_target => convert_reference_target(concrete).map(Into::into),
37        Rule::reference_explicit => unimplemented!("explicit reference"),
38        Rule::reference_auto => Ok(convert_reference_auto(concrete)),
39        _ => unreachable!(),
40    }
41}
42
43fn convert_reference_target(concrete: Pair<'_, Rule>) -> Result<e::Reference, Error> {
44    let rt_inner = concrete.into_inner().next().unwrap();
45    Ok(match rt_inner.as_rule() {
46        Rule::reference_target_uq => e::Reference::new(
47            CommonAttributes::default(),
48            a::Reference {
49                name: Some(rt_inner.as_str().into()),
50                refuri: None,
51                refid: None,
52                refname: vec![rt_inner.as_str().into()],
53            },
54            vec![rt_inner.as_str().into()],
55        ),
56        Rule::reference_target_qu => {
57            let (text, reference) = {
58                let mut text = None;
59                let mut reference = None;
60                for inner in rt_inner.clone().into_inner() {
61                    match inner.as_rule() {
62                        Rule::reference_text => text = Some(inner),
63                        Rule::reference_bracketed => reference = Some(inner),
64                        _ => unreachable!(),
65                    }
66                }
67                (text, reference)
68            };
69            let trimmed_text = match (&text, &reference) {
70                (Some(text), None) => text.as_str(),
71                (_, Some(reference)) => text
72                    .map(|text| text.as_str().trim_end_matches(|ch| " \n\r".contains(ch)))
73                    .filter(|text| !text.is_empty())
74                    .unwrap_or_else(|| reference.clone().into_inner().next().unwrap().as_str()),
75                (None, None) => unreachable!(),
76            };
77            let (refuri, refname): (Option<Url>, Vec<at::NameToken>) =
78                if let Some(reference) = reference {
79                    let inner = reference.into_inner().next().unwrap();
80                    match inner.as_rule() {
81                        // The URL rules in our parser accept a narrow superset of
82                        // valid URLs, so we need to handle false positives.
83                        Rule::url => {
84                            if let Ok(target) = Url::parse_absolute(inner.as_str()) {
85                                (Some(target), Vec::new())
86                            } else if inner.as_str().ends_with('_') {
87                                // like target_name_qu (minus the final underscore)
88                                let full_str = inner.as_str();
89                                (None, vec![full_str[0..full_str.len() - 1].into()])
90                            } else {
91                                // like relative_reference
92                                (Some(Url::parse_relative(inner.as_str())?), Vec::new())
93                            }
94                        }
95                        Rule::target_name_qu => (None, vec![inner.as_str().into()]),
96                        Rule::relative_reference => {
97                            (Some(Url::parse_relative(inner.as_str())?), Vec::new())
98                        }
99                        _ => unreachable!(),
100                    }
101                } else {
102                    (None, vec![trimmed_text.into()])
103                };
104            e::Reference::new(
105                CommonAttributes::default(),
106                a::Reference {
107                    name: Some(trimmed_text.into()),
108                    refuri,
109                    refid: None,
110                    refname,
111                },
112                vec![trimmed_text.into()],
113            )
114        }
115        _ => unreachable!(),
116    })
117}
118
119fn convert_reference_auto(concrete: Pair<'_, Rule>) -> c::TextOrInlineElement {
120    let rt_inner = concrete.into_inner().next().unwrap();
121    let str: c::TextOrInlineElement = rt_inner.as_str().into();
122    let Ok(target) = (match rt_inner.as_rule() {
123        Rule::url_auto => Url::parse_absolute(rt_inner.as_str()),
124        Rule::email => Url::parse_absolute(&format!("mailto:{}", rt_inner.as_str())),
125        _ => unreachable!(),
126    }) else {
127        // if our parser got a URL wrong, return it as a string
128        return str;
129    };
130    e::Reference::new(
131        CommonAttributes::default(),
132        a::Reference {
133            name: None,
134            refuri: Some(target),
135            refid: None,
136            refname: Vec::new(),
137        },
138        vec![str],
139    )
140    .into()
141}
142
143fn convert_substitution_ref(pair: &Pair<Rule>) -> e::SubstitutionReference {
144    let name = whitespace_normalize_name(pair.as_str());
145    a::ExtraAttributes::with_extra(a::SubstitutionReference {
146        refname: vec![at::NameToken(name)],
147    })
148}
149
150fn convert_footnote_reference(pair: Pair<Rule>) -> e::FootnoteReference {
151    let label = pair.into_inner().next().unwrap().as_str();
152
153    let mut fr = e::FootnoteReference::default();
154    if label.len() > 1 {
155        let name = whitespace_normalize_name(&label[1..]);
156        fr.names_mut().push(at::NameToken(name));
157    }
158    fr.extra_mut().auto = label.chars().next().unwrap().try_into().ok();
159    if !fr.is_auto() {
160        fr.children_mut().push(label.into());
161    }
162    fr
163}