xml/reader/
events.rs

1//! Contains `XmlEvent` datatype, instances of which are emitted by the parser.
2
3use crate::attribute::OwnedAttribute;
4use crate::common::XmlVersion;
5use crate::name::OwnedName;
6use crate::namespace::Namespace;
7use std::fmt;
8
9/// An element of an XML input stream.
10///
11/// Items of this enum are emitted by `reader::EventReader`. They correspond to different
12/// elements of an XML document.
13#[derive(PartialEq, Clone)]
14pub enum XmlEvent {
15    /// Corresponds to XML document declaration.
16    ///
17    /// This event is always emitted before any other event. It is emitted
18    /// even if the actual declaration is not present in the document.
19    StartDocument {
20        /// XML version.
21        ///
22        /// If XML declaration is not present, defaults to `Version10`.
23        version: XmlVersion,
24
25        /// XML document encoding.
26        ///
27        /// If XML declaration is not present or does not contain `encoding` attribute,
28        /// defaults to `"UTF-8"`. This field is currently used for no other purpose than
29        /// informational.
30        encoding: String,
31
32        /// XML standalone declaration.
33        ///
34        /// If XML document is not present or does not contain `standalone` attribute,
35        /// defaults to `None`. This field is currently used for no other purpose than
36        /// informational.
37        standalone: Option<bool>,
38    },
39
40    /// Denotes to the end of the document stream.
41    ///
42    /// This event is always emitted after any other event (except `Error`). After it
43    /// is emitted for the first time, it will always be emitted on next event pull attempts.
44    EndDocument,
45
46    /// Denotes an XML processing instruction.
47    ///
48    /// This event contains a processing instruction target (`name`) and opaque `data`. It
49    /// is up to the application to process them.
50    ProcessingInstruction {
51        /// Processing instruction target.
52        name: String,
53
54        /// Processing instruction content.
55        data: Option<String>,
56    },
57
58    /// Denotes a beginning of an XML element.
59    ///
60    /// This event is emitted after parsing opening tags or after parsing bodiless tags. In the
61    /// latter case `EndElement` event immediately follows.
62    StartElement {
63        /// Qualified name of the element.
64        name: OwnedName,
65
66        /// A list of attributes associated with the element.
67        ///
68        /// Currently attributes are not checked for duplicates (TODO)
69        attributes: Vec<OwnedAttribute>,
70
71        /// Contents of the namespace mapping at this point of the document.
72        namespace: Namespace,
73    },
74
75    /// Denotes an end of an XML element.
76    ///
77    /// This event is emitted after parsing closing tags or after parsing bodiless tags. In the
78    /// latter case it is emitted immediately after corresponding `StartElement` event.
79    EndElement {
80        /// Qualified name of the element.
81        name: OwnedName,
82    },
83
84    /// Denotes CDATA content.
85    ///
86    /// This event contains unparsed data. No unescaping will be performed.
87    ///
88    /// It is possible to configure a parser to emit `Characters` event instead of `CData`. See
89    /// `pull::ParserConfiguration` structure for more information.
90    CData(String),
91
92    /// Denotes a comment.
93    ///
94    /// It is possible to configure a parser to ignore comments, so this event will never be emitted.
95    /// See `pull::ParserConfiguration` structure for more information.
96    Comment(String),
97
98    /// Denotes character data outside of tags.
99    ///
100    /// Contents of this event will always be unescaped, so no entities like `&lt;` or `&amp;` or `&#123;`
101    /// will appear in it.
102    ///
103    /// It is possible to configure a parser to trim leading and trailing whitespace for this event.
104    /// See `pull::ParserConfiguration` structure for more information.
105    Characters(String),
106
107    /// Denotes a chunk of whitespace outside of tags.
108    ///
109    /// It is possible to configure a parser to emit `Characters` event instead of `Whitespace`.
110    /// See `pull::ParserConfiguration` structure for more information. When combined with whitespace
111    /// trimming, it will eliminate standalone whitespace from the event stream completely.
112    Whitespace(String),
113}
114
115impl fmt::Debug for XmlEvent {
116    #[cold]
117    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118        match self {
119            Self::StartDocument { version, encoding, standalone } =>
120                write!(f, "StartDocument({}, {}, {:?})", version, *encoding, standalone),
121            Self::EndDocument =>
122                write!(f, "EndDocument"),
123            Self::ProcessingInstruction { name, data } =>
124                write!(f, "ProcessingInstruction({}{})", *name, match data {
125                    Some(data) => format!(", {data}"),
126                    None       => String::new()
127                }),
128            Self::StartElement { name, attributes, namespace: Namespace(namespace) } =>
129                write!(f, "StartElement({}, {:?}{})", name, namespace, if attributes.is_empty() {
130                    String::new()
131                } else {
132                    let attributes: Vec<String> = attributes.iter().map(
133                        |a| format!("{} -> {}", a.name, a.value)
134                    ).collect();
135                    format!(", [{}]", attributes.join(", "))
136                }),
137            Self::EndElement { name } =>
138                write!(f, "EndElement({name})"),
139            Self::Comment(data) =>
140                write!(f, "Comment({data})"),
141            Self::CData(data) =>
142                write!(f, "CData({data})"),
143            Self::Characters(data) =>
144                write!(f, "Characters({data})"),
145            Self::Whitespace(data) =>
146                write!(f, "Whitespace({data})")
147        }
148    }
149}
150
151impl XmlEvent {
152    /// Obtains a writer event from this reader event.
153    ///
154    /// This method is useful for streaming processing of XML documents where the output
155    /// is also an XML document. With this method it is possible to process some events
156    /// while passing other events through to the writer unchanged:
157    ///
158    /// ```rust
159    /// use std::str;
160    ///
161    /// use xml::reader::XmlEvent as ReaderEvent;
162    /// use xml::writer::XmlEvent as WriterEvent;
163    /// use xml::{EventReader, EventWriter};
164    ///
165    /// let mut input: &[u8] = b"<hello>world</hello>";
166    /// let mut output: Vec<u8> = Vec::new();
167    ///
168    /// {
169    ///     let mut reader = EventReader::new(&mut input);
170    ///     let mut writer = EventWriter::new(&mut output);
171    ///
172    ///     for e in reader {
173    ///         match e.unwrap() {
174    ///             ReaderEvent::Characters(s) =>
175    ///                 writer.write(WriterEvent::characters(&s.to_uppercase())).unwrap(),
176    ///             e => if let Some(e) = e.as_writer_event() {
177    ///                 writer.write(e).unwrap()
178    ///             }
179    ///         }
180    ///     }
181    /// }
182    ///
183    /// assert_eq!(
184    ///     str::from_utf8(&output).unwrap(),
185    ///     r#"<?xml version="1.0" encoding="UTF-8"?><hello>WORLD</hello>"#
186    /// );
187    /// ```
188    ///
189    /// Note that this API may change or get additions in future to improve its ergonomics.
190    #[must_use]
191    pub fn as_writer_event(&self) -> Option<crate::writer::events::XmlEvent<'_>> {
192        match self {
193            Self::StartDocument { version, encoding, standalone } =>
194                Some(crate::writer::events::XmlEvent::StartDocument {
195                    version: *version,
196                    encoding: Some(encoding),
197                    standalone: *standalone
198                }),
199            Self::ProcessingInstruction { name, data } =>
200                Some(crate::writer::events::XmlEvent::ProcessingInstruction {
201                    name,
202                    data: data.as_ref().map(|s| &**s)
203                }),
204            Self::StartElement { name, attributes, namespace } =>
205                Some(crate::writer::events::XmlEvent::StartElement {
206                    name: name.borrow(),
207                    attributes: attributes.iter().map(|a| a.borrow()).collect(),
208                    namespace: namespace.borrow(),
209                }),
210            Self::EndElement { name } =>
211                Some(crate::writer::events::XmlEvent::EndElement { name: Some(name.borrow()) }),
212            Self::Comment(data) => Some(crate::writer::events::XmlEvent::Comment(data)),
213            Self::CData(data) => Some(crate::writer::events::XmlEvent::CData(data)),
214            Self::Characters(data) |
215            Self::Whitespace(data) => Some(crate::writer::events::XmlEvent::Characters(data)),
216            Self::EndDocument => None,
217        }
218    }
219}