rst_parser/transforms/
standard.rs

1/*! Perform standard transforms.
2 *
3 * Hyperlinks
4 * ----------
5 *
6 * See <https://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets>
7 *
8 * Links can have internal or external targets.
9 * In the source, targets look like:
10 *
11 * ```restructuredtext
12 * .. targetname1:
13 * .. targetname2:
14 *
15 * some paragraph or list item or so
16 * ```
17 *
18 * or:
19 *
20 * ```restructuredtext
21 * .. targetname1:
22 * .. targetname2: https://link
23 * ```
24 *
25 * There’s also anonymous links and targets without names.
26 *
27 * TODO: continue documenting how it’s done via <https://repo.or.cz/docutils.git/blob/HEAD:/docutils/docutils/transforms/references.py>
28 *
29 * Footnotes
30 * ---------
31 *
32 * See <https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#footnotes>
33 *
34 * Footnotes can be numbered or symbolic.
35 * In the source, they are split into two parts: footnote references and footnotes.
36 *
37 * Their order is defined by the order of the footnotes, not references.
38 */
39
40use std::{collections::HashMap, iter::once, num::NonZero, vec};
41
42use document_tree::{
43    Document, HasChildren, LabelledFootnote as _,
44    attribute_types::{AutoFootnoteType, ID, NameToken},
45    element_categories as c,
46    elements::{self as e, Element},
47    extra_attributes::{ExtraAttributes, FootnoteType},
48    url::Url,
49};
50
51use super::{Transform, Visit};
52
53#[must_use]
54pub fn standard_transform(doc: Document) -> Document {
55    let mut pass1 = Pass1::default();
56    let doc = pass1.transform(doc);
57    let mut pass2 = Pass2::from(&pass1);
58    pass2.visit(&doc);
59    Pass3::from(&pass2).transform(doc)
60}
61
62#[derive(Debug)]
63#[allow(dead_code)]
64enum NamedTargetType {
65    Citation,
66    InternalLink,
67    ExternalLink(Url),
68    IndirectLink(NameToken),
69    SectionTitle,
70}
71impl NamedTargetType {
72    #[allow(dead_code)]
73    /// See <https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets>
74    fn is_implicit_target(&self) -> bool {
75        use NamedTargetType as T;
76        matches!(self, T::SectionTitle | T::Citation)
77    }
78}
79
80const ONE: NonZero<usize> = NonZero::<usize>::MIN;
81
82/// Pass 1: Number footnotes, and add IDs to footnote references and footnotes.
83///
84/// Needs to be separate pass, since resolving `refid`s for footnote references requires already-assigned footnote numbers.
85/// Therefore, we do that here, then (in pass 2) resolve references, and finally (in pass 3) transform the footnotes.
86#[derive(Default, Debug)]
87struct Pass1 {
88    /// Store numbers for symbolic footnotes. They can only be in order, so `_.values().sort() == 1..=_.len()`
89    footnotes_symbol: HashMap<ID, NonZero<usize>>,
90    /// Store numbers for numbered footnotes. They can have gaps due to explicitly numbered ones.
91    footnotes_number: HashMap<ID, NonZero<usize>>,
92    /// Numbers of anonymous footnotes in order of appearance.
93    auto_numbered_anon_footnotes: Vec<NonZero<usize>>,
94    /// Numbers of named footnotes in order of appearance.
95    auto_numbered_named_footnotes: Vec<NonZero<usize>>,
96    /// Number of encountered anonymous footnotes. Only used for ID generation.
97    n_anon_footnotes: usize,
98    /// Number of encountered footnote references. Only used for ID generation.
99    n_footnote_refs: usize,
100}
101impl Pass1 {
102    /// Get next footnote number for a type.
103    ///
104    /// See <https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#mixed-manual-and-auto-numbered-footnotes>
105    fn next_footnote(&mut self, typ: AutoFootnoteType) -> NonZero<usize> {
106        match typ {
107            AutoFootnoteType::Number => {
108                let Some(n) = NonZero::new(self.footnotes_number.len()) else {
109                    return ONE;
110                };
111                let mut ordered: Vec<_> = self.footnotes_number.values().copied().collect();
112                ordered.sort_unstable();
113                ordered
114                    .iter()
115                    .copied()
116                    .zip(1usize..) // https://github.com/rust-lang/rust/pull/127534
117                    .enumerate()
118                    .find_map(|(i, (n1, n2))| (n1.get() != n2).then_some(ONE.saturating_add(i)))
119                    .unwrap_or(n)
120            }
121            AutoFootnoteType::Symbol => {
122                if cfg!(debug_assertions) {
123                    let mut vals: Vec<usize> = self
124                        .footnotes_symbol
125                        .values()
126                        .copied()
127                        .map(Into::into)
128                        .collect();
129                    vals.sort_unstable();
130                    assert_eq!(vals, (1..=self.footnotes_symbol.len()).collect::<Vec<_>>());
131                }
132                ONE.saturating_add(self.footnotes_symbol.len())
133            }
134        }
135    }
136}
137
138impl Transform for Pass1 {
139    /// Add (auto-)id and running count to “ids” of footnotes
140    fn transform_footnote(&mut self, mut e: e::Footnote) -> impl Iterator<Item = c::BodyElement> {
141        // Get next or stored footnote number
142        let n = match e
143            .extra()
144            .auto
145            .map(|t| self.next_footnote(t))
146            .ok_or(())
147            .or_else::<anyhow::Error, _>(|()| Ok(e.get_label()?.parse()?))
148        {
149            Ok(n) => n,
150            Err(err) => {
151                let t = e::Problematic::with_children(vec![err.to_string().into()]).into();
152                return once(e::Paragraph::with_children(vec![t]).into());
153            }
154        };
155
156        // Get ID from name or create one from the running count
157        let id = if let Some(name) = e.names().first() {
158            name.0.as_str().into()
159        } else {
160            self.n_anon_footnotes += 1;
161            ID(format!("footnote-{}", self.n_anon_footnotes))
162        };
163        e.ids_mut().push(id.clone());
164
165        // Add footnote to the correct mapping
166        if e.is_symbol() {
167            self.footnotes_symbol.insert(id.clone(), n);
168        } else {
169            self.footnotes_number.insert(id.clone(), n);
170        }
171
172        // Keep track of named vs anonymous footnotes for auto-numbering refs later
173        if matches!(e.extra().auto, Some(AutoFootnoteType::Number)) {
174            if e.names().is_empty() {
175                self.auto_numbered_anon_footnotes.push(n);
176            } else {
177                self.auto_numbered_named_footnotes.push(n);
178            }
179        }
180
181        // Standard transform
182        self.transform_children(&mut e, Self::transform_sub_footnote);
183        once(e.into())
184    }
185    /// Give each reference an ID. We don’t need to do more.
186    fn transform_footnote_reference(
187        &mut self,
188        mut e: e::FootnoteReference,
189    ) -> impl Iterator<Item = c::TextOrInlineElement> {
190        // Add running count ID
191        self.n_footnote_refs += 1;
192        e.ids_mut()
193            .push(ID(format!("footnote-reference-{}", self.n_footnote_refs)));
194
195        // Standard transform
196        self.transform_children(&mut e, Self::transform_text_or_inline_element);
197        once(e.into())
198    }
199}
200
201#[derive(Clone, Debug)]
202struct Substitution {
203    content: Vec<c::TextOrInlineElement>,
204    /// If true and the sibling before the reference is a text node,
205    /// the text node gets right-trimmed.
206    ltrim: bool,
207    /// Same as `ltrim` with the sibling after the reference.
208    rtrim: bool,
209}
210
211#[derive(Debug)]
212struct Pass2<'p1> {
213    pass1: &'p1 Pass1,
214    named_targets: HashMap<NameToken, NamedTargetType>,
215    substitutions: HashMap<NameToken, Substitution>,
216    normalized_substitutions: HashMap<String, Substitution>,
217    /// Footnote references that reference symbol footnotes.
218    symbol_footnote_refs: HashMap<ID, NonZero<usize>>,
219    /// Footnote references that reference numbered footnotes.
220    /// Multiple can point to the same number.
221    numbered_footnote_refs: HashMap<ID, NonZero<usize>>,
222    /// Number of symbol footnote references.
223    n_symbol_footnote_refs: usize,
224    /// Number of anonymous numbered footnote references.
225    n_numbered_anon_footnote_refs: usize,
226    /// Number of named numbered footnote references.
227    n_numbered_named_footnote_refs: usize,
228}
229impl<'p1> From<&'p1 Pass1> for Pass2<'p1> {
230    fn from(pass1: &'p1 Pass1) -> Self {
231        Self {
232            pass1,
233            named_targets: HashMap::new(),
234            substitutions: HashMap::new(),
235            normalized_substitutions: HashMap::new(),
236            symbol_footnote_refs: HashMap::new(),
237            numbered_footnote_refs: HashMap::new(),
238            n_symbol_footnote_refs: 0,
239            n_numbered_anon_footnote_refs: 0,
240            n_numbered_named_footnote_refs: 0,
241        }
242    }
243}
244
245/// Pass 2.
246///
247/// - Populate substitution definitions.
248/// - Populate (link) targets.
249/// - Resolve which footnotes are referenced by footnote references.
250impl<'tree> Visit<'tree> for Pass2<'_> {
251    fn visit_substitution_definition(&mut self, e: &'tree e::SubstitutionDefinition) {
252        let subst = Substitution {
253            content: e.children().clone(),
254            ltrim: e.extra().ltrim,
255            rtrim: e.extra().rtrim,
256        };
257        for name in e.names() {
258            if self.substitutions.contains_key(name) {
259                // TODO: Duplicate substitution name (level 3 system message).
260            }
261            // Intentionally overriding any previous values.
262            self.substitutions.insert(name.clone(), subst.clone());
263            self.normalized_substitutions
264                .insert(name.0.to_lowercase(), subst.clone());
265        }
266    }
267    fn visit_target(&mut self, e: &'tree e::Target) {
268        if let Some(uri) = &e.extra().refuri {
269            for name in e.names() {
270                self.named_targets
271                    .insert(name.clone(), NamedTargetType::ExternalLink(uri.clone()));
272            }
273        }
274        // TODO: as is, people can only refer to the target directly containing the URL.
275        // add refid and refnames to some HashMap and follow those later.
276    }
277    fn visit_footnote_reference(&mut self, e: &'tree e::FootnoteReference) {
278        let id = e.ids().first().unwrap();
279        let name = e.names().first();
280        let n = match e.extra().auto {
281            Some(AutoFootnoteType::Symbol) => {
282                self.n_symbol_footnote_refs += 1;
283                NonZero::new(self.n_symbol_footnote_refs).unwrap()
284            }
285            Some(AutoFootnoteType::Number) => {
286                if name.is_some() {
287                    self.n_numbered_named_footnote_refs += 1;
288                    self.pass1.auto_numbered_named_footnotes
289                        [self.n_numbered_named_footnote_refs - 1]
290                } else {
291                    self.n_numbered_anon_footnote_refs += 1;
292                    self.pass1.auto_numbered_anon_footnotes[self.n_numbered_anon_footnote_refs - 1]
293                }
294            }
295            None => e.get_label().unwrap().parse().unwrap(),
296        };
297
298        if e.is_symbol() {
299            self.symbol_footnote_refs.insert(id.clone(), n);
300        } else {
301            self.numbered_footnote_refs.insert(id.clone(), n);
302        }
303
304        for c in e.children() {
305            self.visit_text_or_inline_element(c);
306        }
307    }
308}
309
310#[derive(Debug)]
311struct Pass3<'p1, 'p2: 'p1>(&'p2 Pass2<'p1>);
312impl<'p2> Pass3<'_, 'p2> {
313    fn target_url<'t>(self: &'t Pass3<'_, 'p2>, refname: &[NameToken]) -> Option<&'t Url> {
314        // TODO: Check if the target would expand circularly
315        assert!(
316            refname.len() == 1,
317            "Expected exactly one name in a reference."
318        );
319        let name = refname[0].clone();
320        match self.0.named_targets.get(&name)? {
321            NamedTargetType::ExternalLink(url) => Some(url),
322            _ => unimplemented!(),
323        }
324    }
325
326    fn substitution<'t>(
327        self: &'t Pass3<'_, 'p2>,
328        refname: &[NameToken],
329    ) -> Option<&'t Substitution> {
330        // TODO: Check if the substitution would expand circularly
331        assert!(
332            refname.len() == 1,
333            "Expected exactly one name in a substitution reference."
334        );
335        let name = refname[0].clone();
336        self.0
337            .substitutions
338            .get(&name)
339            .or_else(|| self.0.normalized_substitutions.get(&name.0.to_lowercase()))
340    }
341}
342
343impl<'p1, 'p2: 'p1> From<&'p2 Pass2<'p1>> for Pass3<'p1, 'p2> {
344    fn from(p: &'p2 Pass2<'p1>) -> Self {
345        Pass3(p)
346    }
347}
348
349/// 3rd pass.
350impl Transform for Pass3<'_, '_> {
351    fn transform_substitution_definition(
352        &mut self,
353        _: e::SubstitutionDefinition,
354    ) -> impl Iterator<Item = c::BodyElement> {
355        None.into_iter()
356    }
357    fn transform_substitution_reference(
358        &mut self,
359        e: e::SubstitutionReference,
360    ) -> impl Iterator<Item = c::TextOrInlineElement> {
361        let r: Box<dyn Iterator<Item = c::TextOrInlineElement>> = if let Some(Substitution {
362            content,
363            ltrim,
364            rtrim,
365        }) =
366            self.substitution(&e.extra().refname)
367        {
368            // (level 3 system message).
369            // TODO: ltrim and rtrim.
370            if *ltrim || *rtrim {
371                dbg!(content, ltrim, rtrim);
372            }
373            Box::new(content.clone().into_iter())
374        } else {
375            // Undefined substitution name (level 3 system message).
376            // TODO: This replaces the reference by a Problematic node.
377            // The corresponding SystemMessage node should go in a generated
378            // section with class "system-messages" at the end of the document.
379            let mut replacement: Box<e::Problematic> = Box::default();
380            replacement
381                .children_mut()
382                .push(c::TextOrInlineElement::String(Box::new(format!(
383                    "|{}|",
384                    e.extra().refname[0].0
385                ))));
386            // TODO: Create an ID for replacement for the system_message to reference.
387            // TODO: replacement.refid pointing to the system_message.
388
389            Box::new(once(c::TextOrInlineElement::Problematic(replacement)))
390        };
391        r
392    }
393    fn transform_reference(
394        &mut self,
395        mut e: e::Reference,
396    ) -> impl Iterator<Item = c::TextOrInlineElement> {
397        if e.extra().refuri.is_none() {
398            if let Some(uri) = self.target_url(&e.extra().refname) {
399                e.extra_mut().refuri = Some(uri.clone());
400            }
401        }
402        once(e.into())
403    }
404    fn transform_footnote(&mut self, mut e: e::Footnote) -> impl Iterator<Item = c::BodyElement> {
405        /* TODO: https://docutils.sourceforge.io/docs/ref/doctree.html#footnote-reference
406        1. see above
407        2. (in resolve_refs) set `footnote_reference[refid]`s, `footnote[backref]`s and `footnote>label`
408        */
409        let id = e.ids().first().unwrap();
410        let id2num = if e.is_symbol() {
411            &self.0.pass1.footnotes_symbol
412        } else {
413            &self.0.pass1.footnotes_number
414        };
415        let num = id2num.get(id).unwrap();
416        if e.get_label().is_err() {
417            e.children_mut().insert(
418                0,
419                e::Label::with_children(vec![num.to_string().into()]).into(),
420            );
421        }
422
423        // backrefs
424        let refid2num = if e.is_symbol() {
425            &self.0.symbol_footnote_refs
426        } else {
427            &self.0.numbered_footnote_refs
428        };
429        e.extra_mut().backrefs = refid2num
430            .iter()
431            .filter(|&(_, num2)| num == num2)
432            .map(|(refid, _)| refid.clone())
433            .collect();
434
435        // standard transform
436        self.transform_children(&mut e, Self::transform_sub_footnote);
437        once(e.into())
438    }
439    fn transform_footnote_reference(
440        &mut self,
441        mut e: e::FootnoteReference,
442    ) -> impl Iterator<Item = c::TextOrInlineElement> {
443        // TODO: dedupe
444        // https://docutils.sourceforge.io/docs/ref/doctree.html#footnote-reference
445        let refid = e.ids().first().unwrap();
446        let refid2num = if e.is_symbol() {
447            &self.0.symbol_footnote_refs
448        } else {
449            &self.0.numbered_footnote_refs
450        };
451        let n = refid2num.get(refid).unwrap();
452
453        // get referenced footnote ID
454        let footnote2num = if e.is_symbol() {
455            &self.0.pass1.footnotes_symbol
456        } else {
457            &self.0.pass1.footnotes_number
458        };
459        let num2footnote: HashMap<_, _> =
460            footnote2num.iter().map(|(k, v)| (*v, k.clone())).collect();
461        e.extra_mut().refid = num2footnote.get(n).cloned();
462
463        // add label
464        if e.get_label().is_err() {
465            e.children_mut().insert(0, n.to_string().into());
466        }
467
468        // standard transform
469        self.transform_children(&mut e, Self::transform_text_or_inline_element);
470        once(e.into())
471    }
472}