xml/reader/parser/
outside_tag.rs
1use crate::common::is_whitespace_char;
2use crate::reader::error::SyntaxError;
3use crate::reader::events::XmlEvent;
4use crate::reader::lexer::Token;
5
6use super::{
7 ClosingTagSubstate, DoctypeSubstate, Encountered, OpeningTagSubstate,
8 ProcessingInstructionSubstate, PullParser, Result, State,
9};
10
11impl PullParser {
12 pub fn outside_tag(&mut self, t: Token) -> Option<Result> {
13 match t {
14 Token::Character(c) => {
15 if is_whitespace_char(c) {
16 if (self.config.c.trim_whitespace && self.buf.is_empty()) ||
18 (self.depth() == 0 && self.config.c.ignore_root_level_whitespace) {
19 return None;
20 }
21 } else {
22 self.inside_whitespace = false;
23 if self.depth() == 0 {
24 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
25 }
26 }
27
28 if !self.is_valid_xml_char_not_restricted(c) {
29 return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)));
30 }
31
32 if self.buf.is_empty() {
33 self.push_pos();
34 } else if self.buf.len() > self.config.max_data_length {
35 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
36 }
37 self.buf.push(c);
38 None
39 },
40
41 Token::CommentEnd | Token::TagEnd | Token::EqualsSign |
42 Token::DoubleQuote | Token::SingleQuote |
43 Token::ProcessingInstructionEnd | Token::EmptyTagEnd => {
44 if self.depth() == 0 {
45 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
46 }
47 self.inside_whitespace = false;
48
49 if let Some(s) = t.as_static_str() {
50 if self.buf.is_empty() {
51 self.push_pos();
52 } else if self.buf.len() > self.config.max_data_length {
53 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
54 }
55
56 self.buf.push_str(s);
57 }
58 None
59 },
60
61 Token::ReferenceStart if self.depth() > 0 => {
62 self.state_after_reference = State::OutsideTag;
63 self.into_state_continue(State::InsideReference)
64 },
65
66 Token::ReferenceEnd if self.depth() > 0 => { self.inside_whitespace = false;
68 if self.buf.len() > self.config.max_data_length {
69 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
70 }
71 Token::ReferenceEnd.push_to_string(&mut self.buf);
72 None
73 },
74
75 Token::CommentStart if self.config.c.coalesce_characters && self.config.c.ignore_comments => {
76 let next_event = self.set_encountered(Encountered::Comment);
77 self.into_state(State::InsideComment, next_event)
79 }
80
81 Token::CDataStart if self.depth() > 0 && self.config.c.coalesce_characters && self.config.c.cdata_to_characters => {
82 if self.buf.is_empty() {
83 self.push_pos(); }
85 self.into_state_continue(State::InsideCData)
87 },
88
89 _ => {
90 let mut next_event = if self.buf_has_data() {
93 let buf = self.take_buf();
94 if self.inside_whitespace && self.config.c.trim_whitespace {
95 self.next_pos();
97 None
98 } else if self.inside_whitespace && !self.config.c.whitespace_to_characters {
99 debug_assert!(buf.chars().all(|ch| ch.is_whitespace()), "ws={buf:?}");
100 Some(Ok(XmlEvent::Whitespace(buf)))
101 } else if self.config.c.trim_whitespace {
102 Some(Ok(XmlEvent::Characters(buf.trim_matches(is_whitespace_char).into())))
103 } else {
104 Some(Ok(XmlEvent::Characters(buf)))
105 }
106 } else { None };
107 self.inside_whitespace = true; if t != Token::CommentStart || !self.config.c.ignore_comments {
112 self.push_pos();
113 }
114 match t {
115 Token::OpeningTagStart if self.depth() > 0 || self.encountered < Encountered::Element || self.config.allow_multiple_root_elements => {
116 if let Some(e) = self.set_encountered(Encountered::Element) {
117 next_event = Some(e);
118 }
119 self.nst.push_empty();
120 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
121 },
122
123 Token::ClosingTagStart if self.depth() > 0 =>
124 self.into_state(State::InsideClosingTag(ClosingTagSubstate::CTInsideName), next_event),
125
126 Token::CommentStart => {
127 if let Some(e) = self.set_encountered(Encountered::Comment) {
128 next_event = Some(e);
129 }
130 self.into_state(State::InsideComment, next_event)
132 },
133
134 Token::DoctypeStart if self.encountered < Encountered::Doctype => {
135 if let Some(e) = self.set_encountered(Encountered::Doctype) {
136 next_event = Some(e);
137 }
138 self.data.doctype = Some(Token::DoctypeStart.to_string());
139
140 self.next_pos();
143 self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
144 },
145
146 Token::ProcessingInstructionStart =>
147 self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName), next_event),
148
149 Token::CDataStart if self.depth() > 0 => {
150 self.into_state(State::InsideCData, next_event)
151 },
152
153 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
154 }
155 },
156 }
157 }
158
159 pub fn document_start(&mut self, t: Token) -> Option<Result> {
160 debug_assert!(self.encountered < Encountered::Declaration);
161
162 match t {
163 Token::Character(c) => {
164 let next_event = self.set_encountered(Encountered::AnyChars);
165
166 if !is_whitespace_char(c) {
167 return Some(self.error(SyntaxError::UnexpectedTokenOutsideRoot(t)));
168 }
169 self.inside_whitespace = true;
170
171 if (self.config.c.trim_whitespace && self.buf.is_empty()) ||
173 (self.depth() == 0 && self.config.c.ignore_root_level_whitespace) {
174 return self.into_state(State::OutsideTag, next_event);
175 }
176
177 self.push_pos();
178 self.buf.push(c);
179 self.into_state(State::OutsideTag, next_event)
180 },
181
182 Token::CommentStart => {
183 let next_event = self.set_encountered(Encountered::Comment);
184 self.into_state(State::InsideComment, next_event)
185 },
186
187 Token::OpeningTagStart => {
188 let next_event = self.set_encountered(Encountered::Element);
189 self.nst.push_empty();
190 self.into_state(State::InsideOpeningTag(OpeningTagSubstate::InsideName), next_event)
191 },
192
193 Token::DoctypeStart => {
194 let next_event = self.set_encountered(Encountered::Doctype);
195 self.data.doctype = Some(Token::DoctypeStart.to_string());
196
197 self.next_pos();
200 self.into_state(State::InsideDoctype(DoctypeSubstate::Outside), next_event)
201 },
202
203 Token::ProcessingInstructionStart => {
204 self.push_pos();
205 self.into_state_continue(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideName))
206 },
207
208 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
209 }
210 }
211}