xml/reader/
indexset.rs

1use crate::attribute::OwnedAttribute;
2use crate::name::OwnedName;
3
4use std::collections::hash_map::RandomState;
5use std::collections::HashSet;
6use std::hash::{BuildHasher, Hash, Hasher};
7
8/// An ordered set
9pub struct AttributesSet {
10    vec: Vec<OwnedAttribute>,
11    /// Uses a no-op hasher, because these u64s are hashes already
12    may_contain: HashSet<u64, U64HasherBuilder>,
13    /// This is real hasher for the `OwnedName`
14    hasher: RandomState,
15}
16
17/// Use linear search and don't allocate `HashSet` if there are few attributes,
18/// because allocation costs more than a few comparisons.
19const HASH_THRESHOLD: usize = 8;
20
21impl AttributesSet {
22    pub fn new() -> Self {
23        Self {
24            vec: Vec::new(),
25            hasher: RandomState::new(),
26            may_contain: HashSet::default(),
27        }
28    }
29
30    fn hash(&self, val: &OwnedName) -> u64 {
31        let mut h = self.hasher.build_hasher();
32        val.hash(&mut h);
33        h.finish()
34    }
35
36    pub fn len(&self) -> usize {
37        self.vec.len()
38    }
39
40    pub fn contains(&self, name: &OwnedName) -> bool {
41        // fall back to linear search only on duplicate or hash collision
42        (self.vec.len() < HASH_THRESHOLD || self.may_contain.contains(&self.hash(name))) &&
43            self.vec.iter().any(move |a| &a.name == name)
44    }
45
46    pub fn push(&mut self, attr: OwnedAttribute) {
47        if self.vec.len() >= HASH_THRESHOLD {
48            if self.vec.len() == HASH_THRESHOLD {
49                self.may_contain.reserve(HASH_THRESHOLD * 2);
50                for attr in &self.vec {
51                    self.may_contain.insert(self.hash(&attr.name));
52                }
53            }
54            self.may_contain.insert(self.hash(&attr.name));
55        }
56        self.vec.push(attr);
57    }
58
59    pub fn into_vec(self) -> Vec<OwnedAttribute> {
60        self.vec
61    }
62}
63
64#[test]
65fn indexset() {
66    let mut s = AttributesSet::new();
67    let not_here = OwnedName {
68        local_name: "attr1000".into(),
69        namespace: Some("test".into()),
70        prefix: None,
71    };
72
73    // this test will take a lot of time if the `contains()` is linear, and the loop is quadratic
74    for i in 0..50000 {
75        let name = OwnedName {
76            local_name: format!("attr{i}"), namespace: None, prefix: None,
77        };
78        assert!(!s.contains(&name));
79
80        s.push(OwnedAttribute { name, value: String::new() });
81        assert!(!s.contains(&not_here));
82    }
83
84    assert!(s.contains(&OwnedName {
85        local_name: "attr1234".into(), namespace: None, prefix: None,
86    }));
87    assert!(s.contains(&OwnedName {
88        local_name: "attr0".into(), namespace: None, prefix: None,
89    }));
90    assert!(s.contains(&OwnedName {
91        local_name: "attr49999".into(), namespace: None, prefix: None,
92    }));
93}
94
95/// Hashser that does nothing except passing u64 through
96struct U64Hasher(u64);
97
98impl Hasher for U64Hasher {
99    fn finish(&self) -> u64 { self.0 }
100    fn write(&mut self, slice: &[u8]) {
101        for &v in slice { self.0 ^= u64::from(v) } // unused in practice
102    }
103    fn write_u64(&mut self, i: u64) {
104        self.0 ^= i;
105    }
106}
107
108#[derive(Default)]
109struct U64HasherBuilder;
110
111impl BuildHasher for U64HasherBuilder {
112    type Hasher = U64Hasher;
113    fn build_hasher(&self) -> U64Hasher { U64Hasher(0) }
114}