xml/reader/parser/
inside_processing_instruction.rs

1use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
2use crate::reader::error::SyntaxError;
3
4use crate::reader::events::XmlEvent;
5use crate::reader::lexer::Token;
6
7use super::{DeclarationSubstate, Encountered, ProcessingInstructionSubstate, PullParser, Result, State};
8
9impl PullParser {
10    pub fn inside_processing_instruction(&mut self, t: Token, s: ProcessingInstructionSubstate) -> Option<Result> {
11        match s {
12            ProcessingInstructionSubstate::PIInsideName => match t {
13                Token::Character(c) if self.buf.is_empty() && is_name_start_char(c) ||
14                                 self.buf_has_data() && is_name_char(c) => {
15                    if self.buf.len() > self.config.max_name_length {
16                        return Some(self.error(SyntaxError::ExceededConfiguredLimit));
17                    }
18                    self.buf.push(c);
19                    None
20                },
21
22                Token::ProcessingInstructionEnd => {
23                    // self.buf contains PI name
24                    let name = self.take_buf();
25
26                    // Don't need to check for declaration because it has mandatory attributes
27                    // but there is none
28                    match &*name {
29                        // Name is empty, it is an error
30                        "" => Some(self.error(SyntaxError::ProcessingInstructionWithoutName)),
31
32                        // Found <?xml-like PI not at the beginning of a document,
33                        // it is an error - see section 2.6 of XML 1.1 spec
34                        n if "xml".eq_ignore_ascii_case(n) =>
35                            Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),
36
37                        // All is ok, emitting event
38                        _ => {
39                            debug_assert!(self.next_event.is_none(), "{:?}", self.next_event);
40                            // can't have a PI before `<?xml`
41                            let event1 = self.set_encountered(Encountered::Declaration);
42                            let event2 = Some(Ok(XmlEvent::ProcessingInstruction {
43                                name,
44                                data: None
45                            }));
46                            // emitting two events at once is cumbersome
47                            let event1 = if event1.is_some() {
48                                self.next_event = event2;
49                                event1
50                            } else {
51                                event2
52                            };
53                            self.into_state(State::OutsideTag, event1)
54                        },
55                    }
56                },
57
58                Token::Character(c) if is_whitespace_char(c) => {
59                    // self.buf contains PI name
60                    let name = self.take_buf();
61
62                    match &*name {
63                        // We have not ever encountered an element and have not parsed XML declaration
64                        "xml" if self.encountered == Encountered::None =>
65                            self.into_state_continue(State::InsideDeclaration(DeclarationSubstate::BeforeVersion)),
66
67                        // Found <?xml-like PI after the beginning of a document,
68                        // it is an error - see section 2.6 of XML 1.1 spec
69                        n if "xml".eq_ignore_ascii_case(n) =>
70                            Some(self.error(SyntaxError::InvalidXmlProcessingInstruction(name.into()))),
71
72                        // All is ok, starting parsing PI data
73                        _ => {
74                            self.data.name = name;
75                            // can't have a PI before `<?xml`
76                            let next_event = self.set_encountered(Encountered::Declaration);
77                            self.into_state(State::InsideProcessingInstruction(ProcessingInstructionSubstate::PIInsideData), next_event)
78                        },
79                    }
80                },
81
82                _ => {
83                    let buf = self.take_buf();
84                    Some(self.error(SyntaxError::UnexpectedProcessingInstruction(buf.into(), t)))
85                },
86            },
87
88            ProcessingInstructionSubstate::PIInsideData => match t {
89                Token::ProcessingInstructionEnd => {
90                    let name = self.data.take_name();
91                    let data = self.take_buf();
92                    self.into_state_emit(
93                        State::OutsideTag,
94                        Ok(XmlEvent::ProcessingInstruction { name, data: Some(data) }),
95                    )
96                },
97
98                Token::Character(c) if !self.is_valid_xml_char(c) => {
99                    Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32)))
100                },
101
102                // Any other token should be treated as plain characters
103                _ => {
104                    if self.buf.len() > self.config.max_data_length {
105                        return Some(self.error(SyntaxError::ExceededConfiguredLimit));
106                    }
107                    t.push_to_string(&mut self.buf);
108                    None
109                },
110            },
111        }
112    }
113}