memchr/arch/all/
mod.rs

1/*!
2Contains architecture independent routines.
3
4These routines are often used as a "fallback" implementation when the more
5specialized architecture dependent routines are unavailable.
6*/
7
8pub mod memchr;
9pub mod packedpair;
10pub mod rabinkarp;
11#[cfg(feature = "alloc")]
12pub mod shiftor;
13pub mod twoway;
14
15/// Returns true if and only if `needle` is a prefix of `haystack`.
16///
17/// This uses a latency optimized variant of `memcmp` internally which *might*
18/// make this faster for very short strings.
19///
20/// # Inlining
21///
22/// This routine is marked `inline(always)`. If you want to call this function
23/// in a way that is not always inlined, you'll need to wrap a call to it in
24/// another function that is marked as `inline(never)` or just `inline`.
25#[inline(always)]
26pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool {
27    needle.len() <= haystack.len()
28        && is_equal(&haystack[..needle.len()], needle)
29}
30
31/// Returns true if and only if `needle` is a suffix of `haystack`.
32///
33/// This uses a latency optimized variant of `memcmp` internally which *might*
34/// make this faster for very short strings.
35///
36/// # Inlining
37///
38/// This routine is marked `inline(always)`. If you want to call this function
39/// in a way that is not always inlined, you'll need to wrap a call to it in
40/// another function that is marked as `inline(never)` or just `inline`.
41#[inline(always)]
42pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool {
43    needle.len() <= haystack.len()
44        && is_equal(&haystack[haystack.len() - needle.len()..], needle)
45}
46
47/// Compare corresponding bytes in `x` and `y` for equality.
48///
49/// That is, this returns true if and only if `x.len() == y.len()` and
50/// `x[i] == y[i]` for all `0 <= i < x.len()`.
51///
52/// # Inlining
53///
54/// This routine is marked `inline(always)`. If you want to call this function
55/// in a way that is not always inlined, you'll need to wrap a call to it in
56/// another function that is marked as `inline(never)` or just `inline`.
57///
58/// # Motivation
59///
60/// Why not use slice equality instead? Well, slice equality usually results in
61/// a call out to the current platform's `libc` which might not be inlineable
62/// or have other overhead. This routine isn't guaranteed to be a win, but it
63/// might be in some cases.
64#[inline(always)]
65pub fn is_equal(x: &[u8], y: &[u8]) -> bool {
66    if x.len() != y.len() {
67        return false;
68    }
69    // SAFETY: Our pointers are derived directly from borrowed slices which
70    // uphold all of our safety guarantees except for length. We account for
71    // length with the check above.
72    unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) }
73}
74
75/// Compare `n` bytes at the given pointers for equality.
76///
77/// This returns true if and only if `*x.add(i) == *y.add(i)` for all
78/// `0 <= i < n`.
79///
80/// # Inlining
81///
82/// This routine is marked `inline(always)`. If you want to call this function
83/// in a way that is not always inlined, you'll need to wrap a call to it in
84/// another function that is marked as `inline(never)` or just `inline`.
85///
86/// # Motivation
87///
88/// Why not use slice equality instead? Well, slice equality usually results in
89/// a call out to the current platform's `libc` which might not be inlineable
90/// or have other overhead. This routine isn't guaranteed to be a win, but it
91/// might be in some cases.
92///
93/// # Safety
94///
95/// * Both `x` and `y` must be valid for reads of up to `n` bytes.
96/// * Both `x` and `y` must point to an initialized value.
97/// * Both `x` and `y` must each point to an allocated object and
98/// must either be in bounds or at most one byte past the end of the
99/// allocated object. `x` and `y` do not need to point to the same allocated
100/// object, but they may.
101/// * Both `x` and `y` must be _derived from_ a pointer to their respective
102/// allocated objects.
103/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly
104/// for `y` and `y+n`.
105/// * The distance being in bounds must not rely on "wrapping around" the
106/// address space.
107#[inline(always)]
108pub unsafe fn is_equal_raw(
109    mut x: *const u8,
110    mut y: *const u8,
111    n: usize,
112) -> bool {
113    // If we don't have enough bytes to do 4-byte at a time loads, then
114    // handle each possible length specially. Note that I used to have a
115    // byte-at-a-time loop here and that turned out to be quite a bit slower
116    // for the memmem/pathological/defeat-simple-vector-alphabet benchmark.
117    if n < 4 {
118        return match n {
119            0 => true,
120            1 => x.read() == y.read(),
121            2 => {
122                x.cast::<u16>().read_unaligned()
123                    == y.cast::<u16>().read_unaligned()
124            }
125            // I also tried copy_nonoverlapping here and it looks like the
126            // codegen is the same.
127            3 => x.cast::<[u8; 3]>().read() == y.cast::<[u8; 3]>().read(),
128            _ => unreachable!(),
129        };
130    }
131    // When we have 4 or more bytes to compare, then proceed in chunks of 4 at
132    // a time using unaligned loads.
133    //
134    // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is
135    // that this particular version of memcmp is likely to be called with tiny
136    // needles. That means that if we do 8 byte loads, then a higher proportion
137    // of memcmp calls will use the slower variant above. With that said, this
138    // is a hypothesis and is only loosely supported by benchmarks. There's
139    // likely some improvement that could be made here. The main thing here
140    // though is to optimize for latency, not throughput.
141
142    // SAFETY: The caller is responsible for ensuring the pointers we get are
143    // valid and readable for at least `n` bytes. We also do unaligned loads,
144    // so there's no need to ensure we're aligned. (This is justified by this
145    // routine being specifically for short strings.)
146    let xend = x.add(n.wrapping_sub(4));
147    let yend = y.add(n.wrapping_sub(4));
148    while x < xend {
149        let vx = x.cast::<u32>().read_unaligned();
150        let vy = y.cast::<u32>().read_unaligned();
151        if vx != vy {
152            return false;
153        }
154        x = x.add(4);
155        y = y.add(4);
156    }
157    let vx = xend.cast::<u32>().read_unaligned();
158    let vy = yend.cast::<u32>().read_unaligned();
159    vx == vy
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165
166    #[test]
167    fn equals_different_lengths() {
168        assert!(!is_equal(b"", b"a"));
169        assert!(!is_equal(b"a", b""));
170        assert!(!is_equal(b"ab", b"a"));
171        assert!(!is_equal(b"a", b"ab"));
172    }
173
174    #[test]
175    fn equals_mismatch() {
176        let one_mismatch = [
177            (&b"a"[..], &b"x"[..]),
178            (&b"ab"[..], &b"ax"[..]),
179            (&b"abc"[..], &b"abx"[..]),
180            (&b"abcd"[..], &b"abcx"[..]),
181            (&b"abcde"[..], &b"abcdx"[..]),
182            (&b"abcdef"[..], &b"abcdex"[..]),
183            (&b"abcdefg"[..], &b"abcdefx"[..]),
184            (&b"abcdefgh"[..], &b"abcdefgx"[..]),
185            (&b"abcdefghi"[..], &b"abcdefghx"[..]),
186            (&b"abcdefghij"[..], &b"abcdefghix"[..]),
187            (&b"abcdefghijk"[..], &b"abcdefghijx"[..]),
188            (&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]),
189            (&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]),
190            (&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]),
191        ];
192        for (x, y) in one_mismatch {
193            assert_eq!(x.len(), y.len(), "lengths should match");
194            assert!(!is_equal(x, y));
195            assert!(!is_equal(y, x));
196        }
197    }
198
199    #[test]
200    fn equals_yes() {
201        assert!(is_equal(b"", b""));
202        assert!(is_equal(b"a", b"a"));
203        assert!(is_equal(b"ab", b"ab"));
204        assert!(is_equal(b"abc", b"abc"));
205        assert!(is_equal(b"abcd", b"abcd"));
206        assert!(is_equal(b"abcde", b"abcde"));
207        assert!(is_equal(b"abcdef", b"abcdef"));
208        assert!(is_equal(b"abcdefg", b"abcdefg"));
209        assert!(is_equal(b"abcdefgh", b"abcdefgh"));
210        assert!(is_equal(b"abcdefghi", b"abcdefghi"));
211    }
212
213    #[test]
214    fn prefix() {
215        assert!(is_prefix(b"", b""));
216        assert!(is_prefix(b"a", b""));
217        assert!(is_prefix(b"ab", b""));
218        assert!(is_prefix(b"foo", b"foo"));
219        assert!(is_prefix(b"foobar", b"foo"));
220
221        assert!(!is_prefix(b"foo", b"fob"));
222        assert!(!is_prefix(b"foobar", b"fob"));
223    }
224
225    #[test]
226    fn suffix() {
227        assert!(is_suffix(b"", b""));
228        assert!(is_suffix(b"a", b""));
229        assert!(is_suffix(b"ab", b""));
230        assert!(is_suffix(b"foo", b"foo"));
231        assert!(is_suffix(b"foobar", b"bar"));
232
233        assert!(!is_suffix(b"foo", b"goo"));
234        assert!(!is_suffix(b"foobar", b"gar"));
235    }
236}