xml/reader/parser/
inside_reference.rs

1use super::{PullParser, Result, State};
2use crate::common::{is_name_char, is_name_start_char, is_whitespace_char};
3use crate::reader::error::SyntaxError;
4use crate::reader::lexer::Token;
5use std::char;
6
7impl PullParser {
8    pub fn inside_reference(&mut self, t: Token) -> Option<Result> {
9        match t {
10            Token::Character(c) if !self.data.ref_data.is_empty() && is_name_char(c) ||
11                             self.data.ref_data.is_empty() && (is_name_start_char(c) || c == '#') => {
12                self.data.ref_data.push(c);
13                None
14            },
15
16            Token::ReferenceEnd => {
17                let name = self.data.take_ref_data();
18                if name.is_empty() {
19                    return Some(self.error(SyntaxError::EmptyEntity));
20                }
21
22                let c = match &*name {
23                    "lt"   => Some('<'),
24                    "gt"   => Some('>'),
25                    "amp"  => Some('&'),
26                    "apos" => Some('\''),
27                    "quot" => Some('"'),
28                    _ if name.starts_with('#') => match self.numeric_reference_from_str(&name[1..]) {
29                        Ok(c) => Some(c),
30                        Err(e) => return Some(self.error(e)),
31                    },
32                    _ => None,
33                };
34                if let Some(c) = c {
35                    self.buf.push(c);
36                } else if let Some(v) = self.config.c.extra_entities.get(&name) {
37                    self.buf.push_str(v);
38                } else if let Some(v) = self.entities.get(&name) {
39                    if self.state_after_reference == State::OutsideTag {
40                        // an entity can expand to *elements*, so outside of a tag it needs a full reparse
41                        if let Err(e) = self.lexer.reparse(v) {
42                            return Some(Err(e));
43                        }
44                    } else {
45                        // however, inside attributes it's not allowed to affect attribute quoting,
46                        // so it can't be fed to the lexer
47                        self.buf.push_str(v);
48                    }
49                } else {
50                    return Some(self.error(SyntaxError::UnexpectedEntity(name.into())));
51                }
52                let prev_st = self.state_after_reference;
53                if prev_st == State::OutsideTag && !is_whitespace_char(self.buf.chars().last().unwrap_or('\0')) {
54                    self.inside_whitespace = false;
55                }
56                self.into_state_continue(prev_st)
57            },
58
59            _ => Some(self.error(SyntaxError::UnexpectedTokenInEntity(t))),
60        }
61    }
62
63    pub(crate) fn numeric_reference_from_str(&self, num_str: &str) -> std::result::Result<char, SyntaxError> {
64        let val = if let Some(hex) = num_str.strip_prefix('x') {
65            u32::from_str_radix(hex, 16).map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
66        } else {
67            num_str.parse::<u32>().map_err(move |_| SyntaxError::InvalidNumericEntity(num_str.into()))?
68        };
69        match char::from_u32(val) {
70            Some(c) if self.is_valid_xml_char(c) => Ok(c),
71            Some(_) if self.config.c.replace_unknown_entity_references => Ok('\u{fffd}'),
72            None if self.config.c.replace_unknown_entity_references => Ok('\u{fffd}'),
73            _ => Err(SyntaxError::InvalidCharacterEntity(val)),
74        }
75    }
76}