xml/reader/parser/
inside_declaration.rs

1use crate::common::{is_whitespace_char, XmlVersion};
2use crate::reader::error::SyntaxError;
3use crate::reader::events::XmlEvent;
4use crate::reader::lexer::Token;
5use crate::util::Encoding;
6
7use super::{
8    DeclarationSubstate, Encountered, PullParser, QualifiedNameTarget, Result, State,
9    DEFAULT_VERSION,
10};
11
12impl PullParser {
13    #[inline(never)]
14    fn emit_start_document(&mut self) -> Option<Result> {
15        debug_assert!(self.encountered == Encountered::None);
16        self.encountered = Encountered::Declaration;
17
18        let version = self.data.version;
19        let encoding = self.data.take_encoding();
20        let standalone = self.data.standalone;
21
22        if let Some(new_encoding) = encoding.as_deref() {
23            let new_encoding = match new_encoding.parse() {
24                Ok(e) => e,
25                Err(_) if self.config.ignore_invalid_encoding_declarations => Encoding::Latin1,
26                Err(_) => return Some(self.error(SyntaxError::UnsupportedEncoding(new_encoding.into()))),
27            };
28            let current_encoding = self.lexer.encoding();
29            if current_encoding != new_encoding {
30                let set = match (current_encoding, new_encoding) {
31                    (Encoding::Unknown | Encoding::Default, new) if new != Encoding::Utf16 => new,
32                    (Encoding::Utf16Be | Encoding::Utf16Le, Encoding::Utf16) => current_encoding,
33                    _ if self.config.ignore_invalid_encoding_declarations => current_encoding,
34                    _ => return Some(self.error(SyntaxError::ConflictingEncoding(new_encoding, current_encoding))),
35                };
36                self.lexer.set_encoding(set);
37            }
38        }
39
40        let current_encoding = self.lexer.encoding();
41        self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartDocument {
42            version: version.unwrap_or(DEFAULT_VERSION),
43            encoding: encoding.unwrap_or_else(move || current_encoding.to_string()),
44            standalone
45        }))
46    }
47
48    // TODO: remove redundancy via macros or extra methods
49    pub fn inside_declaration(&mut self, t: Token, s: DeclarationSubstate) -> Option<Result> {
50
51        match s {
52            DeclarationSubstate::BeforeVersion => match t {
53                Token::Character('v') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersion)),
54                Token::Character(c) if is_whitespace_char(c) => None,  // continue
55                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
56            },
57
58            DeclarationSubstate::InsideVersion => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
59                match &*name.local_name {
60                    "ersion" if name.namespace.is_none() =>
61                        this.into_state_continue(State::InsideDeclaration(
62                            if token == Token::EqualsSign {
63                                DeclarationSubstate::InsideVersionValue
64                            } else {
65                                DeclarationSubstate::AfterVersion
66                            }
67                        )),
68                    _ => Some(this.error(SyntaxError::UnexpectedNameInsideXml(name.to_string().into()))),
69                }
70            }),
71
72            DeclarationSubstate::AfterVersion => match t {
73                Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideVersionValue)),
74                Token::Character(c) if is_whitespace_char(c) => None,
75                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
76            },
77
78            DeclarationSubstate::InsideVersionValue => self.read_attribute_value(t, |this, value| {
79                this.data.version = match &*value {
80                    "1.0" => Some(XmlVersion::Version10),
81                    "1.1" => Some(XmlVersion::Version11),
82                    _     => None
83                };
84                if this.data.version.is_some() {
85                    this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterVersionValue))
86                } else {
87                    Some(this.error(SyntaxError::UnexpectedXmlVersion(value.into())))
88                }
89            }),
90
91            DeclarationSubstate::AfterVersionValue => match t {
92                Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeEncoding)),
93                Token::ProcessingInstructionEnd => self.emit_start_document(),
94                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
95            },
96
97            DeclarationSubstate::BeforeEncoding => match t {
98                Token::Character('e') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncoding)),
99                Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
100                Token::ProcessingInstructionEnd => self.emit_start_document(),
101                Token::Character(c) if is_whitespace_char(c) => None,  // skip whitespace
102                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
103            },
104
105            DeclarationSubstate::InsideEncoding => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
106                match &*name.local_name {
107                    "ncoding" if name.namespace.is_none() =>
108                        this.into_state_continue(State::InsideDeclaration(
109                            if token == Token::EqualsSign { DeclarationSubstate::InsideEncodingValue } else { DeclarationSubstate::AfterEncoding }
110                        )),
111                    _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into())))
112                }
113            }),
114
115            DeclarationSubstate::AfterEncoding => match t {
116                Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideEncodingValue)),
117                Token::Character(c) if is_whitespace_char(c) => None,
118                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
119            },
120
121            DeclarationSubstate::InsideEncodingValue => self.read_attribute_value(t, |this, value| {
122                this.data.encoding = Some(value);
123                this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterEncodingValue))
124            }),
125
126            DeclarationSubstate::AfterEncodingValue => match t {
127                Token::Character(c) if is_whitespace_char(c) => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeStandaloneDecl)),
128                Token::ProcessingInstructionEnd => self.emit_start_document(),
129                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
130            },
131
132            DeclarationSubstate::BeforeStandaloneDecl => match t {
133                Token::Character('s') => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDecl)),
134                Token::ProcessingInstructionEnd => self.emit_start_document(),
135                Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
136                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
137            },
138
139            DeclarationSubstate::InsideStandaloneDecl => self.read_qualified_name(t, QualifiedNameTarget::AttributeNameTarget, |this, token, name| {
140                match &*name.local_name {
141                    "tandalone" if name.namespace.is_none() =>
142                        this.into_state_continue(State::InsideDeclaration(
143                            if token == Token::EqualsSign {
144                                DeclarationSubstate::InsideStandaloneDeclValue
145                            } else {
146                                DeclarationSubstate::AfterStandaloneDecl
147                            }
148                        )),
149                    _ => Some(this.error(SyntaxError::UnexpectedName(name.to_string().into()))),
150                }
151            }),
152
153            DeclarationSubstate::AfterStandaloneDecl => match t {
154                Token::EqualsSign => self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::InsideStandaloneDeclValue)),
155                Token::Character(c) if is_whitespace_char(c) => None,
156                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
157            },
158
159            DeclarationSubstate::InsideStandaloneDeclValue => self.read_attribute_value(t, |this, value| {
160                let standalone = match &*value {
161                    "yes" => Some(true),
162                    "no"  => Some(false),
163                    _     => None
164                };
165                if standalone.is_some() {
166                    this.data.standalone = standalone;
167                    this.into_state_continue(State::InsideDeclaration(DeclarationSubstate::AfterStandaloneDeclValue))
168                } else {
169                    Some(this.error(SyntaxError::InvalidStandaloneDeclaration(value.into())))
170                }
171            }),
172
173            DeclarationSubstate::AfterStandaloneDeclValue => match t {
174                Token::ProcessingInstructionEnd => self.emit_start_document(),
175                Token::Character(c) if is_whitespace_char(c) => None, // skip whitespace
176                _ => Some(self.error(SyntaxError::UnexpectedToken(t))),
177            },
178        }
179    }
180}