places/
hash.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5// This is MAX_CHARS_TO_HASH in places, but I've renamed it because it's in bytes.
6// Note that the indices for slicing a Rust `str` are in bytes, so this is what
7// we want anyway.
8const MAX_BYTES_TO_HASH: usize = 1500;
9
10/// This should be identical to the "real" `mozilla::places::HashURL` with no prefix arg
11/// (see also `hash_url_prefix` for the version with one).
12///
13/// This returns a u64, but only the lower 48 bits should ever be set, so casting to
14/// an i64 is totally safe and lossless. If the string has no ':' in it, then the
15/// returned hash will be a 32 bit hash.
16pub fn hash_url(spec: &str) -> u64 {
17    let max_len_to_hash = spec.len().min(MAX_BYTES_TO_HASH);
18    let str_hash = u64::from(hash_string(&spec[..max_len_to_hash]));
19    let str_head = &spec[..spec.len().min(50)];
20    // We should be using memchr -- there's almost no chance we aren't
21    // already pulling it in transitively and it's supposedly *way* faster.
22    if let Some(pos) = str_head.as_bytes().iter().position(|&b| b == b':') {
23        let prefix_hash = u64::from(hash_string(&spec[..pos]) & 0x0000_ffff);
24        (prefix_hash << 32).wrapping_add(str_hash)
25    } else {
26        str_hash
27    }
28}
29
30#[derive(Clone, Copy, Debug, PartialEq, Eq)]
31pub enum PrefixMode {
32    /// Equivalent to `"prefix_lo"` in mozilla::places::HashURL
33    Lo,
34    /// Equivalent to `"prefix_hi"` in mozilla::places::HashURL
35    Hi,
36}
37
38/// This should be identical to the "real" `mozilla::places::HashURL` when given
39/// a prefix arg. Specifically:
40///
41/// - `hash_url_prefix(spec, PrefixMode::Lo)` is identical to
42/// - `hash_url_prefix(spec, PrefixMode::Hi)` is identical to
43///
44/// As with `hash_url`, it returns a u64, but only the lower 48 bits should ever be set, so
45/// casting to e.g. an i64 is lossless.
46pub fn hash_url_prefix(spec_prefix: &str, mode: PrefixMode) -> u64 {
47    let to_hash = &spec_prefix[..spec_prefix.len().min(MAX_BYTES_TO_HASH)];
48
49    // Keep 16 bits
50    let unshifted_hash = hash_string(to_hash) & 0x0000_ffff;
51    let hash = u64::from(unshifted_hash) << 32;
52    if mode == PrefixMode::Hi {
53        hash.wrapping_add(0xffff_ffffu64)
54    } else {
55        hash
56    }
57}
58
59// mozilla::kGoldenRatioU32
60const GOLDEN_RATIO: u32 = 0x9E37_79B9;
61
62// mozilla::AddU32ToHash
63#[inline]
64fn add_u32_to_hash(hash: u32, new_value: u32) -> u32 {
65    (hash.rotate_left(5) ^ new_value).wrapping_mul(GOLDEN_RATIO)
66}
67
68/// This should return identical results to `mozilla::HashString`!
69#[inline]
70pub fn hash_string(string: &str) -> u32 {
71    string
72        .as_bytes()
73        .iter()
74        .fold(0u32, |hash, &cur| add_u32_to_hash(hash, u32::from(cur)))
75}
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    #[test]
82    fn test_prefixes() {
83        // These are the unique 16 bits of the prefix. You can generate these with:
84        // `PlacesUtils.history.hashURL(val, "prefix_lo").toString(16).slice(0, 4)`.
85        let test_values = &[
86            ("http", 0x7226u16),
87            ("https", 0x2b12),
88            ("blob", 0x2612),
89            ("data", 0x9736),
90            ("chrome", 0x75fc),
91            ("resource", 0x37f8),
92            ("file", 0xc7c9),
93            ("place", 0xf434),
94        ];
95        for &(prefix, top16bits) in test_values {
96            let expected_lo = u64::from(top16bits) << 32;
97            let expected_hi = expected_lo | 0xffff_ffffu64;
98            assert_eq!(
99                hash_url_prefix(prefix, PrefixMode::Lo),
100                expected_lo,
101                "wrong value for hash_url_prefix({:?}, PrefixMode::Lo)",
102                prefix
103            );
104            assert_eq!(
105                hash_url_prefix(prefix, PrefixMode::Hi),
106                expected_hi,
107                "wrong value for hash_url_prefix({:?}, PrefixMode::Hi)",
108                prefix
109            );
110        }
111    }
112
113    #[test]
114    fn test_hash_url() {
115        // not actually a valid png, but whatever.
116        let data_url = "data:image/png;base64,".to_owned() + &"iVBORw0KGgoAAA".repeat(500);
117        let test_values = &[
118            ("http://www.example.com", 0x7226_2c1a_3496u64),
119            ("http://user:pass@foo:21/bar;par?b#c", 0x7226_61d2_18a7u64),
120            (
121                "https://github.com/mozilla/application-services/",
122                0x2b12_e7bd_7fcdu64,
123            ),
124            ("place:transition=7&sort=4", 0xf434_ac2b_2dafu64),
125            (
126                "blob:36c6ded1-6190-45f4-8fcd-355d1b6c9f48",
127                0x2612_0a43_1050u64,
128            ),
129            ("www.example.com", 0x8b14_9337u64), // URLs without a prefix are hashed to 32 bits
130            (&data_url[..], 0x9736_d65d_86d9u64),
131        ];
132
133        for &(url_str, hash) in test_values {
134            assert_eq!(hash_url(url_str), hash, "Wrong value for url {:?}", url_str);
135        }
136    }
137}