litrs/integer/
mod.rs

1use std::fmt;
2
3use crate::{
4    Buffer, ParseError,
5    err::{perr, ParseErrorKind::*},
6    parse::{first_byte_or_empty, hex_digit_value},
7};
8
9
10/// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`.
11///
12/// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`),
13/// the main part (digits and underscores), and an optional type suffix
14/// (e.g. `u64` or `i8`). See [the reference][ref] for more information.
15///
16/// Note that integer literals are always positive: the grammar does not contain
17/// the minus sign at all. The minus sign is just the unary negate operator,
18/// not part of the literal. Which is interesting for cases like `- 128i8`:
19/// here, the literal itself would overflow the specified type (`i8` cannot
20/// represent 128). That's why in rustc, the literal overflow check is
21/// performed as a lint after parsing, not during the lexing stage. Similarly,
22/// [`IntegerLit::parse`] does not perform an overflow check.
23///
24/// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26#[non_exhaustive]
27pub struct IntegerLit<B: Buffer> {
28    base: IntegerBase,
29    main_part: B,
30    type_suffix: Option<IntegerType>,
31}
32
33/// The bases in which an integer can be specified.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum IntegerBase {
36    Binary,
37    Octal,
38    Decimal,
39    Hexadecimal,
40}
41
42/// All possible integer type suffixes.
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
44pub enum IntegerType {
45    U8,
46    U16,
47    U32,
48    U64,
49    U128,
50    Usize,
51    I8,
52    I16,
53    I32,
54    I64,
55    I128,
56    Isize,
57}
58
59impl IntegerBase {
60    /// Returns the literal prefix that indicates this base, i.e. `"0b"`,
61    /// `"0o"`, `""` and `"0x"`.
62    pub fn prefix(self) -> &'static str {
63        match self {
64            Self::Binary => "0b",
65            Self::Octal => "0o",
66            Self::Decimal => "",
67            Self::Hexadecimal => "0x",
68        }
69    }
70}
71
72impl<B: Buffer> IntegerLit<B> {
73    /// Parses the input as an integer literal. Returns an error if the input is
74    /// invalid or represents a different kind of literal.
75    pub fn parse(input: B) -> Result<Self, ParseError> {
76        match first_byte_or_empty(&input)? {
77            digit @ b'0'..=b'9' => Self::parse_impl(input, digit),
78            _ => Err(perr(0, DoesNotStartWithDigit)),
79        }
80    }
81
82    /// Performs the actual string to int conversion to obtain the integer
83    /// value. The optional type suffix of the literal **is ignored by this
84    /// method**. This means `N` does not need to match the type suffix!
85    ///
86    /// Returns `None` if the literal overflows `N`.
87    pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> {
88        let base = match self.base {
89            IntegerBase::Binary => N::from_small_number(2),
90            IntegerBase::Octal => N::from_small_number(8),
91            IntegerBase::Decimal => N::from_small_number(10),
92            IntegerBase::Hexadecimal => N::from_small_number(16),
93        };
94
95        let mut acc = N::from_small_number(0);
96        for digit in self.main_part.bytes() {
97            if digit == b'_' {
98                continue;
99            }
100
101            // We don't actually need the base here: we already know this main
102            // part only contains digits valid for the specified base.
103            let digit = hex_digit_value(digit)
104                .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit"));
105
106            acc = acc.checked_mul(base)?;
107            acc = acc.checked_add(N::from_small_number(digit))?;
108        }
109
110        Some(acc)
111    }
112
113    /// The base of this integer literal.
114    pub fn base(&self) -> IntegerBase {
115        self.base
116    }
117
118    /// The main part containing the digits and potentially `_`. Do not try to
119    /// parse this directly as that would ignore the base!
120    pub fn raw_main_part(&self) -> &str {
121        &self.main_part
122    }
123
124    /// The type suffix, if specified.
125    pub fn type_suffix(&self) -> Option<IntegerType> {
126        self.type_suffix
127    }
128
129    /// Precondition: first byte of string has to be in `b'0'..=b'9'`.
130    pub(crate) fn parse_impl(input: B, first: u8) -> Result<Self, ParseError> {
131        // Figure out base and strip prefix base, if it exists.
132        let (end_prefix, base) = match (first, input.as_bytes().get(1)) {
133            (b'0', Some(b'b')) => (2, IntegerBase::Binary),
134            (b'0', Some(b'o')) => (2, IntegerBase::Octal),
135            (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal),
136
137            // Everything else is treated as decimal. Several cases are caught
138            // by this:
139            // - "123"
140            // - "0"
141            // - "0u8"
142            // - "0r" -> this will error later
143            _ => (0, IntegerBase::Decimal),
144        };
145        let without_prefix = &input[end_prefix..];
146
147        // Find end of main part.
148        let end_main = without_prefix.bytes()
149                .position(|b| !matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'))
150                .unwrap_or(without_prefix.len());
151        let (main_part, type_suffix) = without_prefix.split_at(end_main);
152
153        // Check for invalid digits and make sure there is at least one valid digit.
154        let invalid_digit_pos = match base {
155            IntegerBase::Binary => main_part.bytes()
156                .position(|b| !matches!(b, b'0' | b'1' | b'_')),
157            IntegerBase::Octal => main_part.bytes()
158                .position(|b| !matches!(b, b'0'..=b'7' | b'_')),
159            IntegerBase::Decimal => main_part.bytes()
160                .position(|b| !matches!(b, b'0'..=b'9' | b'_')),
161            IntegerBase::Hexadecimal => None,
162        };
163
164        if let Some(pos) = invalid_digit_pos {
165            return Err(perr(end_prefix + pos, InvalidDigit));
166        }
167
168        if main_part.bytes().filter(|&b| b != b'_').count() == 0 {
169            return Err(perr(end_prefix..end_prefix + end_main, NoDigits));
170        }
171
172
173        // Parse type suffix
174        let type_suffix = match type_suffix {
175            "" => None,
176            "u8" => Some(IntegerType::U8),
177            "u16" => Some(IntegerType::U16),
178            "u32" => Some(IntegerType::U32),
179            "u64" => Some(IntegerType::U64),
180            "u128" => Some(IntegerType::U128),
181            "usize" => Some(IntegerType::Usize),
182            "i8" => Some(IntegerType::I8),
183            "i16" => Some(IntegerType::I16),
184            "i32" => Some(IntegerType::I32),
185            "i64" => Some(IntegerType::I64),
186            "i128" => Some(IntegerType::I128),
187            "isize" => Some(IntegerType::Isize),
188            _ => return Err(perr(end_main + end_prefix..input.len(), InvalidIntegerTypeSuffix)),
189        };
190
191        Ok(Self {
192            base,
193            main_part: input.cut(end_prefix..end_main + end_prefix),
194            type_suffix,
195        })
196    }
197}
198
199impl IntegerLit<&str> {
200    /// Makes a copy of the underlying buffer and returns the owned version of
201    /// `Self`.
202    pub fn to_owned(&self) -> IntegerLit<String> {
203        IntegerLit {
204            base: self.base,
205            main_part: self.main_part.to_owned(),
206            type_suffix: self.type_suffix,
207        }
208    }
209}
210
211impl<B: Buffer> fmt::Display for IntegerLit<B> {
212    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
213        let suffix = match self.type_suffix {
214            None => "",
215            Some(IntegerType::U8) => "u8",
216            Some(IntegerType::U16) => "u16",
217            Some(IntegerType::U32) => "u32",
218            Some(IntegerType::U64) => "u64",
219            Some(IntegerType::U128) => "u128",
220            Some(IntegerType::Usize) => "usize",
221            Some(IntegerType::I8) => "i8",
222            Some(IntegerType::I16) => "i16",
223            Some(IntegerType::I32) => "i32",
224            Some(IntegerType::I64) => "i64",
225            Some(IntegerType::I128) => "i128",
226            Some(IntegerType::Isize) => "isize",
227        };
228        write!(f, "{}{}{}", self.base.prefix(), &*self.main_part, suffix)
229    }
230}
231
232/// Integer literal types. *Implementation detail*.
233///
234/// Implemented for all integer literal types. This trait is sealed and cannot
235/// be implemented outside of this crate. The trait's methods are implementation
236/// detail of this library and are not subject to semver.
237pub trait FromIntegerLiteral: self::sealed::Sealed + Copy {
238    /// Creates itself from the given number. `n` is guaranteed to be `<= 16`.
239    #[doc(hidden)]
240    fn from_small_number(n: u8) -> Self;
241
242    #[doc(hidden)]
243    fn checked_add(self, rhs: Self) -> Option<Self>;
244
245    #[doc(hidden)]
246    fn checked_mul(self, rhs: Self) -> Option<Self>;
247
248    #[doc(hidden)]
249    fn ty() -> IntegerType;
250}
251
252macro_rules! impl_from_int_literal {
253    ($( $ty:ty => $variant:ident ,)* ) => {
254        $(
255            impl self::sealed::Sealed for $ty {}
256            impl FromIntegerLiteral for $ty {
257                fn from_small_number(n: u8) -> Self {
258                    n as Self
259                }
260                fn checked_add(self, rhs: Self) -> Option<Self> {
261                    self.checked_add(rhs)
262                }
263                fn checked_mul(self, rhs: Self) -> Option<Self> {
264                    self.checked_mul(rhs)
265                }
266                fn ty() -> IntegerType {
267                    IntegerType::$variant
268                }
269            }
270        )*
271    };
272}
273
274impl_from_int_literal!(
275    u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize,
276    i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize,
277);
278
279mod sealed {
280    pub trait Sealed {}
281}
282
283
284#[cfg(test)]
285mod tests;