1use std::fmt::Write;
2
3use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
4use crate::reader::error::SyntaxError;
5use crate::reader::lexer::Token;
6
7use super::{DoctypeSubstate, PullParser, QuoteToken, Result, State};
8
9impl PullParser {
10 pub fn inside_doctype(&mut self, t: Token, substate: DoctypeSubstate) -> Option<Result> {
11 if let Some(ref mut doctype) = self.data.doctype {
12 write!(doctype, "{t}").ok()?;
13 if doctype.len() > self.config.max_data_length {
14 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
15 }
16 }
17
18 match substate {
19 DoctypeSubstate::Outside => match t {
20 Token::TagEnd => self.into_state_continue(State::OutsideTag),
21 Token::MarkupDeclarationStart => {
22 self.buf.clear();
23 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::InsideName))
24 },
25 Token::Character('%') => {
26 self.data.ref_data.clear();
27 self.data.ref_data.push('%');
28 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInDtd))
29 },
30 Token::CommentStart => {
31 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Comment))
32 },
33 Token::SingleQuote | Token::DoubleQuote => {
34 self.data.quote = super::QuoteToken::from_token(t);
36 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::String))
37 },
38 Token::CDataEnd | Token::CDataStart => Some(self.error(SyntaxError::UnexpectedToken(t))),
39 _ => None,
41 },
42 DoctypeSubstate::String => match t {
43 Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => None,
44 Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => None,
45 Token::SingleQuote | Token::DoubleQuote => {
46 self.data.quote = None;
47 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
48 },
49 _ => None,
50 },
51 DoctypeSubstate::Comment => match t {
52 Token::CommentEnd => {
53 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
54 },
55 _ => None,
56 },
57 DoctypeSubstate::InsideName => match t {
58 Token::Character(c @ 'A'..='Z') => {
59 self.buf.push(c);
60 None
61 },
62 Token::Character(c) if is_whitespace_char(c) => {
63 let buf = self.take_buf();
64 match buf.as_str() {
65 "ENTITY" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityName)),
66 "NOTATION" | "ELEMENT" | "ATTLIST" => self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)),
67 _ => Some(self.error(SyntaxError::UnknownMarkupDeclaration(buf.into()))),
68 }
69 },
70 _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
71 },
72 DoctypeSubstate::BeforeEntityName => {
73 self.data.name.clear();
74 match t {
75 Token::Character(c) if is_whitespace_char(c) => None,
76 Token::Character('%') => { self.data.name.push('%');
78 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinitionStart))
79 },
80 Token::Character(c) if is_name_start_char(c) => {
81 if self.data.name.len() > self.config.max_name_length {
82 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
83 }
84 self.data.name.push(c);
85 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityName))
86 },
87 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
88 }
89 },
90 DoctypeSubstate::EntityName => match t {
91 Token::Character(c) if is_whitespace_char(c) => {
92 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue))
93 },
94 Token::Character(c) if is_name_char(c) => {
95 if self.data.name.len() > self.config.max_name_length {
96 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
97 }
98 self.data.name.push(c);
99 None
100 },
101 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
102 },
103 DoctypeSubstate::BeforeEntityValue => {
104 self.buf.clear();
105 match t {
106 Token::Character(c) if is_whitespace_char(c) => None,
107 Token::Character('S' | 'P') => {
109 let name = self.data.take_name();
110 self.entities.entry(name).or_default(); self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration))
113 },
114 Token::SingleQuote | Token::DoubleQuote => {
115 self.data.quote = super::QuoteToken::from_token(t);
116 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
117 },
118 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
119 }
120 },
121 DoctypeSubstate::EntityValue => match t {
122 Token::SingleQuote if self.data.quote != Some(QuoteToken::SingleQuoteToken) => { self.buf.push('\''); None },
123 Token::DoubleQuote if self.data.quote != Some(QuoteToken::DoubleQuoteToken) => { self.buf.push('"'); None },
124 Token::SingleQuote | Token::DoubleQuote => {
125 self.data.quote = None;
126 let name = self.data.take_name();
127 let val = self.take_buf();
128 self.entities.entry(name).or_insert(val); self.into_state_continue(State::InsideDoctype(DoctypeSubstate::SkipDeclaration)) },
131 Token::ReferenceStart | Token::Character('&') => {
132 self.data.ref_data.clear();
133 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReferenceStart))
134 },
135 Token::Character('%') => {
136 self.data.ref_data.clear();
137 self.data.ref_data.push('%'); self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceInValue))
139 },
140 Token::Character(c) if !self.is_valid_xml_char(c) => {
141 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
142 },
143 Token::Character(c) => {
144 self.buf.push(c);
145 None
146 },
147 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
148 },
149 DoctypeSubstate::PEReferenceDefinitionStart => match t {
150 Token::Character(c) if is_whitespace_char(c) => None,
151 Token::Character(c) if is_name_start_char(c) => {
152 debug_assert_eq!(self.data.name, "%");
153 self.data.name.push(c);
154 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::PEReferenceDefinition))
155 },
156 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
157 },
158 DoctypeSubstate::PEReferenceDefinition => match t {
159 Token::Character(c) if is_name_char(c) => {
160 if self.data.name.len() > self.config.max_name_length {
161 return Some(self.error(SyntaxError::ExceededConfiguredLimit));
162 }
163 self.data.name.push(c);
164 None
165 },
166 Token::Character(c) if is_whitespace_char(c) => {
167 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::BeforeEntityValue))
168 },
169 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
170 },
171 DoctypeSubstate::PEReferenceInDtd => match t {
172 Token::Character(c) if is_name_char(c) => {
173 self.data.ref_data.push(c);
174 None
175 },
176 Token::ReferenceEnd | Token::Character(';') => {
177 let name = self.data.take_ref_data();
178 match self.entities.get(&name) {
179 Some(ent) => {
180 if let Err(e) = self.lexer.reparse(ent) {
181 return Some(Err(e));
182 }
183 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
184 },
185 None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))),
186 }
187 },
188 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
189 },
190 DoctypeSubstate::PEReferenceInValue => match t {
191 Token::Character(c) if is_name_char(c) => {
192 self.data.ref_data.push(c);
193 None
194 },
195 Token::ReferenceEnd | Token::Character(';') => {
196 let name = self.data.take_ref_data();
197 match self.entities.get(&name) {
198 Some(ent) => {
199 self.buf.push_str(ent);
200 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
201 },
202 None => Some(self.error(SyntaxError::UndefinedEntity(name.into()))),
203 }
204 },
205 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
206 },
207 DoctypeSubstate::NumericReferenceStart => match t {
208 Token::Character('#') => {
209 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::NumericReference))
210 },
211 Token::Character(c) if !self.is_valid_xml_char(c) => {
212 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
213 },
214 Token::Character(c) => {
215 self.buf.push('&');
216 self.buf.push(c);
217 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
219 },
220 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
221 },
222 DoctypeSubstate::NumericReference => match t {
223 Token::ReferenceEnd | Token::Character(';') => {
224 let r = self.data.take_ref_data();
225 match self.numeric_reference_from_str(&r) {
227 Ok(c) => {
228 self.buf.push(c);
229 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::EntityValue))
230 },
231 Err(e) => Some(self.error(e)),
232 }
233 },
234 Token::Character(c) if !self.is_valid_xml_char(c) => {
235 Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
236 },
237 Token::Character(c) => {
238 self.data.ref_data.push(c);
239 None
240 },
241 _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
242 },
243 DoctypeSubstate::SkipDeclaration => match t {
244 Token::TagEnd => {
245 self.into_state_continue(State::InsideDoctype(DoctypeSubstate::Outside))
246 },
247 _ => None,
248 },
249 }
250 }
251}