rst_parser/
conversion.rs

1mod block;
2mod inline;
3#[cfg(test)]
4mod tests;
5
6use anyhow::Error;
7use pest::iterators::Pairs;
8
9use document_tree::{
10    Element, HasChildren, attribute_types as at, element_categories as c, elements as e,
11};
12
13use crate::pest_rst::Rule;
14
15fn ssubel_to_section_unchecked_mut(ssubel: &mut c::StructuralSubElement) -> &mut e::Section {
16    match ssubel {
17        c::StructuralSubElement::SubStructure(b) => match b.as_mut() {
18            c::SubStructure::Section(s) => s,
19            _ => unreachable!(),
20        },
21        _ => unreachable!(),
22    }
23}
24
25fn get_level<'tl>(
26    toplevel: &'tl mut Vec<c::StructuralSubElement>,
27    section_idxs: &[Option<usize>],
28) -> &'tl mut Vec<c::StructuralSubElement> {
29    let mut level = toplevel;
30    for i in section_idxs.iter().flatten().copied() {
31        level = ssubel_to_section_unchecked_mut(&mut level[i]).children_mut();
32    }
33    level
34}
35
36pub fn convert_document(pairs: Pairs<Rule>) -> Result<e::Document, Error> {
37    use self::block::TitleOrSsubel::{Ssubel, Title};
38
39    let mut toplevel: Vec<c::StructuralSubElement> = vec![];
40    // The kinds of section titles encountered.
41    // `section_idx[x]` has the kind `kinds[x]`, but `kinds` can be longer
42    let mut kinds: Vec<block::TitleKind> = vec![];
43    // Recursive indices into the tree, pointing at the active sections.
44    // `None`s indicate skipped section levels:
45    // toplevel[section_idxs.flatten()[0]].children[section_idxs.flatten()[1]]...
46    let mut section_idxs: Vec<Option<usize>> = vec![];
47
48    for pair in pairs {
49        if let Some(ssubel) = block::convert_ssubel(pair)? {
50            match ssubel {
51                Title(title, kind) => {
52                    match kinds.iter().position(|k| k == &kind) {
53                        // Idx points to the level we want to add,
54                        // so idx-1 needs to be the last valid index.
55                        Some(idx) => {
56                            // If idx < len: Remove found section and all below
57                            section_idxs.truncate(idx);
58                            // If idx > len: Add None for skipped levels
59                            // TODO: test skipped levels
60                            while section_idxs.len() < idx {
61                                section_idxs.push(None);
62                            }
63                        }
64                        None => kinds.push(kind),
65                    }
66                    let super_level = get_level(&mut toplevel, &section_idxs);
67                    let slug = title
68                        .names()
69                        .iter()
70                        .next()
71                        .map(|at::NameToken(name)| at::ID(name.to_owned()));
72                    let mut section = e::Section::with_children(vec![title.into()]);
73                    section.ids_mut().extend(slug.into_iter());
74                    super_level.push(section.into());
75                    section_idxs.push(Some(super_level.len() - 1));
76                }
77                Ssubel(elem) => get_level(&mut toplevel, &section_idxs).push(elem),
78            }
79        }
80    }
81    Ok(e::Document::with_children(toplevel))
82}
83
84/// Normalizes a name in terms of whitespace. Equivalent to docutils's
85/// `docutils.nodes.whitespace_normalize_name`.
86pub fn whitespace_normalize_name(name: &str) -> String {
87    // Python's string.split() defines whitespace differently than Rust does.
88    let split_iter = name
89        .split(|ch: char| ch.is_whitespace() || ('\x1C'..='\x1F').contains(&ch))
90        .filter(|split| !split.is_empty());
91    let mut ret = String::new();
92    for split in split_iter {
93        if !ret.is_empty() {
94            ret.push(' ');
95        }
96        ret.push_str(split);
97    }
98    ret
99}