1use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::{hex_digit_value, check_suffix}};
2
3
4pub(crate) fn unescape<E: Escapee>(
18 input: &str,
19 offset: usize,
20 unicode: bool,
21 byte_escapes: bool,
22) -> Result<(E, usize), ParseError> {
23 let first = input.as_bytes().get(1)
24 .ok_or(perr(offset, UnterminatedEscape))?;
25 let out = match first {
26 b'\'' => (E::from_byte(b'\''), 2),
28 b'"' => (E::from_byte(b'"'), 2),
29
30 b'n' => (E::from_byte(b'\n'), 2),
32 b'r' => (E::from_byte(b'\r'), 2),
33 b't' => (E::from_byte(b'\t'), 2),
34 b'\\' => (E::from_byte(b'\\'), 2),
35 b'0' => (E::from_byte(b'\0'), 2),
36 b'x' => {
37 let hex_string = input.get(2..4)
38 .ok_or(perr(offset..offset + input.len(), UnterminatedEscape))?
39 .as_bytes();
40 let first = hex_digit_value(hex_string[0])
41 .ok_or(perr(offset..offset + 4, InvalidXEscape))?;
42 let second = hex_digit_value(hex_string[1])
43 .ok_or(perr(offset..offset + 4, InvalidXEscape))?;
44 let value = second + 16 * first;
45
46 if !byte_escapes && value > 0x7F {
47 return Err(perr(offset..offset + 4, NonAsciiXEscape));
48 }
49
50 (E::from_byte(value), 4)
51 },
52
53 b'u' => {
55 if !unicode {
56 return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral));
57 }
58
59 if input.as_bytes().get(2) != Some(&b'{') {
60 return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace));
61 }
62
63 let closing_pos = input.bytes().position(|b| b == b'}')
64 .ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?;
65
66 let inner = &input[3..closing_pos];
67 if inner.as_bytes().first() == Some(&b'_') {
68 return Err(perr(4, InvalidStartOfUnicodeEscape));
69 }
70
71 let mut v: u32 = 0;
72 let mut digit_count = 0;
73 for (i, b) in inner.bytes().enumerate() {
74 if b == b'_'{
75 continue;
76 }
77
78 let digit = hex_digit_value(b)
79 .ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?;
80
81 if digit_count == 6 {
82 return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape));
83 }
84 digit_count += 1;
85 v = 16 * v + digit as u32;
86 }
87
88 let c = std::char::from_u32(v)
89 .ok_or(perr(offset..offset + closing_pos + 1, InvalidUnicodeEscapeChar))?;
90
91 (E::from_char(c), closing_pos + 1)
92 }
93
94 _ => return Err(perr(offset..offset + 2, UnknownEscape)),
95 };
96
97 Ok(out)
98}
99
100pub(crate) trait Escapee: Sized {
101 type Container: EscapeeContainer<Self>;
102 fn from_byte(b: u8) -> Self;
103 fn from_char(c: char) -> Self;
104}
105
106impl Escapee for u8 {
107 type Container = Vec<u8>;
108 fn from_byte(b: u8) -> Self {
109 b
110 }
111 fn from_char(_: char) -> Self {
112 panic!("bug: `<u8 as Escapee>::from_char` was called");
113 }
114}
115
116impl Escapee for char {
117 type Container = String;
118 fn from_byte(b: u8) -> Self {
119 b.into()
120 }
121 fn from_char(c: char) -> Self {
122 c
123 }
124}
125
126pub(crate) trait EscapeeContainer<E: Escapee> {
127 fn new() -> Self;
128 fn is_empty(&self) -> bool;
129 fn push(&mut self, v: E);
130 fn push_str(&mut self, s: &str);
131}
132
133impl EscapeeContainer<u8> for Vec<u8> {
134 fn new() -> Self { Self::new() }
135 fn is_empty(&self) -> bool { self.is_empty() }
136 fn push(&mut self, v: u8) { self.push(v); }
137 fn push_str(&mut self, s: &str) { self.extend_from_slice(s.as_bytes()); }
138}
139
140impl EscapeeContainer<char> for String {
141 fn new() -> Self { Self::new() }
142 fn is_empty(&self) -> bool { self.is_empty() }
143 fn push(&mut self, v: char) { self.push(v); }
144 fn push_str(&mut self, s: &str) { self.push_str(s); }
145}
146
147
148fn is_string_continue_skipable_whitespace(b: u8) -> bool {
151 b == b' ' || b == b'\t' || b == b'\n'
152}
153
154#[inline(never)]
156pub(crate) fn unescape_string<E: Escapee>(
157 input: &str,
158 offset: usize,
159 unicode: bool,
160 byte_escapes: bool,
161) -> Result<(Option<E::Container>, usize), ParseError> {
162 let mut closing_quote_pos = None;
163 let mut i = offset;
164 let mut end_last_escape = offset;
165 let mut value = <E::Container>::new();
166 while i < input.len() {
167 match input.as_bytes()[i] {
168 b'\\' if input.as_bytes().get(i + 1) == Some(&b'\n') => {
170 value.push_str(&input[end_last_escape..i]);
171
172 let end_escape = input[i + 2..].bytes()
174 .position(|b| !is_string_continue_skipable_whitespace(b))
175 .ok_or(perr(None, UnterminatedString))?;
176
177 i += 2 + end_escape;
178 end_last_escape = i;
179 }
180 b'\\' => {
181 let rest = &input[i..input.len() - 1];
182 let (c, len) = unescape::<E>(rest, i, unicode, byte_escapes)?;
183 value.push_str(&input[end_last_escape..i]);
184 value.push(c);
185 i += len;
186 end_last_escape = i;
187 }
188 b'\r' => return Err(perr(i, CarriageReturn)),
189 b'"' => {
190 closing_quote_pos = Some(i);
191 break;
192 },
193 b if !unicode && !b.is_ascii() => return Err(perr(i, NonAsciiInByteLiteral)),
194 _ => i += 1,
195 }
196 }
197
198 let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedString))?;
199
200 let start_suffix = closing_quote_pos + 1;
201 let suffix = &input[start_suffix..];
202 check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
203
204 let value = if value.is_empty() {
208 None
209 } else {
210 value.push_str(&input[end_last_escape..closing_quote_pos]);
213 Some(value)
214 };
215
216 Ok((value, start_suffix))
217}
218
219#[inline(never)]
222pub(crate) fn scan_raw_string<E: Escapee>(
223 input: &str,
224 offset: usize,
225 unicode: bool,
226) -> Result<(u32, usize), ParseError> {
227 let num_hashes = input[offset..].bytes().position(|b| b != b'#')
229 .ok_or(perr(None, InvalidLiteral))?;
230
231 if input.as_bytes().get(offset + num_hashes) != Some(&b'"') {
232 return Err(perr(None, InvalidLiteral));
233 }
234 let start_inner = offset + num_hashes + 1;
235 let hashes = &input[offset..num_hashes + offset];
236
237 let mut closing_quote_pos = None;
238 let mut i = start_inner;
239 while i < input.len() {
240 let b = input.as_bytes()[i];
241 if b == b'"' && input[i + 1..].starts_with(hashes) {
242 closing_quote_pos = Some(i);
243 break;
244 }
245
246 if b == b'\r' {
250 return Err(perr(i, CarriageReturn));
251 }
252
253 if !unicode {
254 if !b.is_ascii() {
255 return Err(perr(i, NonAsciiInByteLiteral));
256 }
257 }
258
259 i += 1;
260 }
261
262 let closing_quote_pos = closing_quote_pos.ok_or(perr(None, UnterminatedRawString))?;
263
264 let start_suffix = closing_quote_pos + num_hashes + 1;
265 let suffix = &input[start_suffix..];
266 check_suffix(suffix).map_err(|kind| perr(start_suffix, kind))?;
267
268 Ok((num_hashes as u32, start_suffix))
269}