rst_parser/transforms/
standard.rs

1/*! Perform standard transforms.
2 *
3 * Hyperlinks
4 * ----------
5 *
6 * See <https://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets>
7 *
8 * Links can have internal or external targets.
9 * In the source, targets look like:
10 *
11 * ```restructuredtext
12 * .. targetname1:
13 * .. targetname2:
14 *
15 * some paragraph or list item or so
16 * ```
17 *
18 * or:
19 *
20 * ```restructuredtext
21 * .. targetname1:
22 * .. targetname2: https://link
23 * ```
24 *
25 * There’s also anonymous links and targets without names.
26 *
27 * TODO: continue documenting how it’s done via <https://repo.or.cz/docutils.git/blob/HEAD:/docutils/docutils/transforms/references.py>
28 *
29 * Footnotes
30 * ---------
31 *
32 * See <https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#footnotes>
33 *
34 * Footnotes can be numbered or symbolic.
35 * In the source, they are split into two parts: footnote references and footnotes.
36 *
37 * Their order is defined by the order of the footnotes, not references.
38 */
39
40use std::{collections::HashMap, iter::once, num::NonZero, vec};
41
42use document_tree::{
43    Document, HasChildren, LabelledFootnote as _,
44    attribute_types::{FootnoteType, ID, NameToken},
45    element_categories as c,
46    elements::{self as e, Element},
47    extra_attributes::{ExtraAttributes, FootnoteTypeExt},
48    url::Url,
49};
50use linearize::{Linearize, StaticMap};
51
52use super::{Transform, Visit};
53
54#[must_use]
55pub fn standard_transform(doc: Document) -> Document {
56    let mut pass1 = Pass1::default();
57    let doc = pass1.transform(doc);
58    let mut pass2 = Pass2::from(&pass1);
59    pass2.visit(&doc);
60    Pass3::from(&pass2).transform(doc)
61}
62
63#[derive(Debug)]
64#[allow(dead_code)]
65enum NamedTargetType {
66    Citation,
67    InternalLink,
68    ExternalLink(Url),
69    IndirectLink(NameToken),
70    SectionTitle,
71}
72impl NamedTargetType {
73    #[allow(dead_code)]
74    /// See <https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets>
75    fn is_implicit_target(&self) -> bool {
76        use NamedTargetType as T;
77        matches!(self, T::SectionTitle | T::Citation)
78    }
79}
80
81const ONE: NonZero<usize> = NonZero::<usize>::MIN;
82
83/// Pass 1: Number footnotes, and add IDs to footnote references and footnotes.
84///
85/// Needs to be separate pass, since resolving `refid`s for footnote references requires already-assigned footnote numbers.
86/// Therefore, we do that here, then (in pass 2) resolve references, and finally (in pass 3) transform the footnotes.
87#[derive(Default, Debug)]
88struct Pass1 {
89    /// Store numbers footnotes.
90    /// Symbol ones can only be in order, so `_.values().sort() == 1..=_.len()`
91    /// Number ones can have gaps due to explicitly numbered ones.
92    footnotes: StaticMap<FootnoteType, HashMap<ID, NonZero<usize>>>,
93    /// Numbers of auto-nubered footnotes in order of appearance.
94    auto_numbered_footnotes: StaticMap<AutoNumberedFootnoteType, Vec<NonZero<usize>>>,
95    /// Number of encountered anonymous footnotes. Only used for ID generation.
96    n_anon_footnotes: usize,
97    /// Number of encountered footnote references. Only used for ID generation.
98    n_footnote_refs: usize,
99}
100impl Pass1 {
101    /// Get next footnote number for a type.
102    ///
103    /// See <https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#mixed-manual-and-auto-numbered-footnotes>
104    fn next_footnote(&mut self, typ: FootnoteType) -> NonZero<usize> {
105        let footnotes = &mut self.footnotes[typ];
106        match typ {
107            FootnoteType::Number => {
108                let Some(n) = NonZero::new(footnotes.len()) else {
109                    return ONE;
110                };
111                let mut ordered: Vec<_> = footnotes.values().copied().collect();
112                ordered.sort_unstable();
113                ordered
114                    .iter()
115                    .copied()
116                    .zip(1usize..) // https://github.com/rust-lang/rust/pull/127534
117                    .enumerate()
118                    .find_map(|(i, (n1, n2))| (n1.get() != n2).then_some(ONE.saturating_add(i)))
119                    .unwrap_or(n)
120            }
121            FootnoteType::Symbol => {
122                if cfg!(debug_assertions) {
123                    let mut vals: Vec<usize> =
124                        footnotes.values().copied().map(Into::into).collect();
125                    vals.sort_unstable();
126                    assert_eq!(vals, (1..=footnotes.len()).collect::<Vec<_>>());
127                }
128                ONE.saturating_add(footnotes.len())
129            }
130        }
131    }
132}
133
134#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Linearize)]
135enum AutoNumberedFootnoteType {
136    Anomymous,
137    Named,
138}
139
140impl AutoNumberedFootnoteType {
141    fn from_names(names: &[NameToken]) -> Self {
142        if names.is_empty() {
143            Self::Anomymous
144        } else {
145            Self::Named
146        }
147    }
148}
149
150impl Transform for Pass1 {
151    /// Add (auto-)id and running count to “ids” of footnotes
152    fn transform_footnote(&mut self, mut e: e::Footnote) -> impl Iterator<Item = c::BodyElement> {
153        // Get next or stored footnote number
154        let n = match e
155            .extra()
156            .auto
157            .map(|t| self.next_footnote(t))
158            .ok_or(())
159            .or_else::<anyhow::Error, _>(|()| Ok(e.get_label()?.parse()?))
160        {
161            Ok(n) => n,
162            Err(err) => {
163                let t = e::Problematic::with_children(vec![err.to_string().into()]).into();
164                return once(e::Paragraph::with_children(vec![t]).into());
165            }
166        };
167
168        // Get ID from name or create one from the running count
169        let id = if let Some(name) = e.names().first() {
170            name.0.as_str().into()
171        } else {
172            self.n_anon_footnotes += 1;
173            ID(format!("footnote-{}", self.n_anon_footnotes))
174        };
175        e.ids_mut().push(id.clone());
176
177        // Add footnote to the correct mapping
178        self.footnotes[e.footnote_type()].insert(id.clone(), n);
179
180        // Keep track of named vs anonymous footnotes for auto-numbering refs later
181        if matches!(e.extra().auto, Some(FootnoteType::Number)) {
182            let t = AutoNumberedFootnoteType::from_names(e.names());
183            self.auto_numbered_footnotes[t].push(n);
184        }
185
186        // Standard transform
187        self.transform_children(&mut e, Self::transform_sub_footnote);
188        once(e.into())
189    }
190    /// Give each reference an ID. We don’t need to do more.
191    fn transform_footnote_reference(
192        &mut self,
193        mut e: e::FootnoteReference,
194    ) -> impl Iterator<Item = c::TextOrInlineElement> {
195        // Add running count ID
196        self.n_footnote_refs += 1;
197        e.ids_mut()
198            .push(ID(format!("footnote-reference-{}", self.n_footnote_refs)));
199
200        // Standard transform
201        self.transform_children(&mut e, Self::transform_text_or_inline_element);
202        once(e.into())
203    }
204}
205
206#[derive(Clone, Debug, PartialEq)]
207struct Substitution {
208    content: Vec<c::TextOrInlineElement>,
209    /// If true and the sibling before the reference is a text node,
210    /// the text node gets right-trimmed.
211    ltrim: bool,
212    /// Same as `ltrim` with the sibling after the reference.
213    rtrim: bool,
214}
215
216#[derive(Debug)]
217struct Pass2<'p1> {
218    pass1: &'p1 Pass1,
219    named_targets: HashMap<NameToken, NamedTargetType>,
220    substitutions: HashMap<NameToken, Substitution>,
221    normalized_substitutions: HashMap<String, Substitution>,
222    /// Footnote references.
223    /// Multiple numbered ones can point to the same number.
224    footnote_refs: StaticMap<FootnoteType, HashMap<ID, NonZero<usize>>>,
225    /// Number of symbol footnote references.
226    n_symbol_footnote_refs: usize,
227    /// Number of auto-numbered footnote references.
228    n_auto_num_footnote_refs: StaticMap<AutoNumberedFootnoteType, usize>,
229}
230impl<'p1> From<&'p1 Pass1> for Pass2<'p1> {
231    fn from(pass1: &'p1 Pass1) -> Self {
232        Self {
233            pass1,
234            named_targets: HashMap::new(),
235            substitutions: HashMap::new(),
236            normalized_substitutions: HashMap::new(),
237            footnote_refs: StaticMap::default(),
238            n_symbol_footnote_refs: 0,
239            n_auto_num_footnote_refs: StaticMap::default(),
240        }
241    }
242}
243
244/// Pass 2.
245///
246/// - Populate substitution definitions.
247/// - Populate (link) targets.
248/// - Resolve which footnotes are referenced by footnote references.
249impl<'tree> Visit<'tree> for Pass2<'_> {
250    fn visit_substitution_definition(&mut self, e: &'tree e::SubstitutionDefinition) {
251        let subst = Substitution {
252            content: e.children().clone(),
253            ltrim: e.extra().ltrim,
254            rtrim: e.extra().rtrim,
255        };
256        for name in e.names() {
257            if self.substitutions.contains_key(name) {
258                // TODO: Duplicate substitution name (level 3 system message).
259            }
260            // Intentionally overriding any previous values.
261            self.substitutions.insert(name.clone(), subst.clone());
262            self.normalized_substitutions
263                .insert(name.0.to_lowercase(), subst.clone());
264        }
265    }
266    fn visit_target(&mut self, e: &'tree e::Target) {
267        if let Some(uri) = &e.extra().refuri {
268            for name in e.names() {
269                self.named_targets
270                    .insert(name.clone(), NamedTargetType::ExternalLink(uri.clone()));
271            }
272        }
273        // TODO: as is, people can only refer to the target directly containing the URL.
274        // add refid and refnames to some HashMap and follow those later.
275    }
276    fn visit_footnote_reference(&mut self, e: &'tree e::FootnoteReference) {
277        let id = e.ids().first().unwrap();
278        let n = match e.extra().auto {
279            Some(FootnoteType::Symbol) => {
280                self.n_symbol_footnote_refs += 1;
281                NonZero::new(self.n_symbol_footnote_refs).unwrap()
282            }
283            Some(FootnoteType::Number) => {
284                let t = AutoNumberedFootnoteType::from_names(e.names());
285                self.n_auto_num_footnote_refs[t] += 1;
286                self.pass1.auto_numbered_footnotes[t][self.n_auto_num_footnote_refs[t] - 1]
287            }
288            None => e.get_label().unwrap().parse().unwrap(),
289        };
290
291        self.footnote_refs[e.footnote_type()].insert(id.clone(), n);
292
293        for c in e.children() {
294            self.visit_text_or_inline_element(c);
295        }
296    }
297}
298
299#[derive(Debug)]
300struct Pass3<'p2>(&'p2 Pass2<'p2>);
301impl<'p2> Pass3<'p2> {
302    fn target_url<'t>(self: &'t Pass3<'p2>, refname: &[NameToken]) -> Option<&'t Url> {
303        // TODO: Check if the target would expand circularly
304        assert!(
305            refname.len() == 1,
306            "Expected exactly one name in a reference."
307        );
308        let name = refname[0].clone();
309        match self.0.named_targets.get(&name)? {
310            NamedTargetType::ExternalLink(url) => Some(url),
311            _ => unimplemented!(),
312        }
313    }
314
315    fn substitution<'t>(self: &'t Pass3<'p2>, refname: &[NameToken]) -> Option<&'t Substitution> {
316        // TODO: Check if the substitution would expand circularly
317        assert!(
318            refname.len() == 1,
319            "Expected exactly one name in a substitution reference."
320        );
321        let name = refname[0].clone();
322        self.0
323            .substitutions
324            .get(&name)
325            .or_else(|| self.0.normalized_substitutions.get(&name.0.to_lowercase()))
326    }
327}
328
329impl<'p2> From<&'p2 Pass2<'p2>> for Pass3<'p2> {
330    fn from(p: &'p2 Pass2<'p2>) -> Self {
331        Pass3(p)
332    }
333}
334
335/// 3rd pass.
336impl Transform for Pass3<'_> {
337    fn transform_substitution_definition(
338        &mut self,
339        _: e::SubstitutionDefinition,
340    ) -> impl Iterator<Item = c::BodyElement> {
341        None.into_iter()
342    }
343    fn transform_substitution_reference(
344        &mut self,
345        e: e::SubstitutionReference,
346    ) -> impl Iterator<Item = c::TextOrInlineElement> {
347        let r: Box<dyn Iterator<Item = c::TextOrInlineElement>> = if let Some(Substitution {
348            content,
349            ltrim,
350            rtrim,
351        }) =
352            self.substitution(&e.extra().refname)
353        {
354            // (level 3 system message).
355            // TODO: ltrim and rtrim.
356            if *ltrim || *rtrim {
357                dbg!(content, ltrim, rtrim);
358            }
359            Box::new(content.clone().into_iter())
360        } else {
361            // Undefined substitution name (level 3 system message).
362            // TODO: This replaces the reference by a Problematic node.
363            // The corresponding SystemMessage node should go in a generated
364            // section with class "system-messages" at the end of the document.
365            let mut replacement: Box<e::Problematic> = Box::default();
366            replacement
367                .children_mut()
368                .push(c::TextOrInlineElement::String(Box::new(format!(
369                    "|{}|",
370                    e.extra().refname[0].0
371                ))));
372            // TODO: Create an ID for replacement for the system_message to reference.
373            // TODO: replacement.refid pointing to the system_message.
374
375            Box::new(once(c::TextOrInlineElement::Problematic(replacement)))
376        };
377        r
378    }
379    fn transform_reference(
380        &mut self,
381        mut e: e::Reference,
382    ) -> impl Iterator<Item = c::TextOrInlineElement> {
383        if e.extra().refuri.is_none() {
384            if let Some(uri) = self.target_url(&e.extra().refname) {
385                e.extra_mut().refuri = Some(uri.clone());
386            }
387        }
388        once(e.into())
389    }
390    fn transform_footnote(&mut self, mut e: e::Footnote) -> impl Iterator<Item = c::BodyElement> {
391        /* TODO: https://docutils.sourceforge.io/docs/ref/doctree.html#footnote-reference
392        1. see above
393        2. (in resolve_refs) set `footnote_reference[refid]`s, `footnote[backref]`s and `footnote>label`
394        */
395        let id = e.ids().first().unwrap();
396        let num = self.0.pass1.footnotes[e.footnote_type()].get(id).unwrap();
397        if e.get_label().is_err() {
398            e.children_mut().insert(
399                0,
400                e::Label::with_children(vec![num.to_string().into()]).into(),
401            );
402        }
403
404        // backrefs
405        e.extra_mut().backrefs = self.0.footnote_refs[e.footnote_type()]
406            .iter()
407            .filter(|&(_, num2)| num == num2)
408            .map(|(refid, _)| refid.clone())
409            .collect();
410
411        // standard transform
412        self.transform_children(&mut e, Self::transform_sub_footnote);
413        once(e.into())
414    }
415    fn transform_footnote_reference(
416        &mut self,
417        mut e: e::FootnoteReference,
418    ) -> impl Iterator<Item = c::TextOrInlineElement> {
419        // TODO: dedupe
420        // https://docutils.sourceforge.io/docs/ref/doctree.html#footnote-reference
421        let refid = e.ids().first().unwrap();
422        let n = self.0.footnote_refs[e.footnote_type()].get(refid).unwrap();
423
424        // get referenced footnote ID
425        e.extra_mut().refid = self.0.pass1.footnotes[e.footnote_type()]
426            .iter()
427            .find_map(|(k, v)| (v == n).then_some(k.clone()));
428
429        // add label
430        if e.get_label().is_err() {
431            e.children_mut().insert(0, n.to_string().into());
432        }
433
434        // standard transform
435        self.transform_children(&mut e, Self::transform_text_or_inline_element);
436        once(e.into())
437    }
438}