sync_guid/
lib.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5#![allow(unknown_lints)]
6#![warn(rust_2018_idioms)]
7// (It's tempting to avoid the utf8 checks, but they're easy to get wrong, so)
8#![deny(unsafe_code)]
9#[cfg(feature = "serde_support")]
10mod serde_support;
11
12#[cfg(feature = "rusqlite_support")]
13mod rusqlite_support;
14
15use std::{
16    cmp::Ordering,
17    fmt,
18    hash::{Hash, Hasher},
19    ops, str,
20};
21
22#[cfg(feature = "random")]
23use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine};
24
25/// This is a type intended to be used to represent the guids used by sync. It
26/// has several benefits over using a `String`:
27///
28/// 1. It's more explicit about what is being stored, and could prevent bugs
29///    where a Guid is passed to a function expecting text.
30///
31/// 2. Guids are guaranteed to be immutable.
32///
33/// 3. It's optimized for the guids commonly used by sync. In particular, short guids
34///    (including the guids which would meet `PlacesUtils.isValidGuid`) do not incur
35///    any heap allocation, and are stored inline.
36#[derive(Clone)]
37pub struct Guid(Repr);
38
39// The internal representation of a GUID. Most Sync GUIDs are 12 bytes,
40// and contain only base64url characters; we can store them on the stack
41// without a heap allocation. However, arbitrary ascii guids of up to length 64
42// are possible, in which case we fall back to a heap-allocated string.
43//
44// This is separate only because making `Guid` an enum would expose the
45// internals.
46#[derive(Clone)]
47enum Repr {
48    // see FastGuid for invariants
49    Fast(FastGuid),
50
51    // invariants:
52    // - _0.len() > MAX_FAST_GUID_LEN
53    Slow(String),
54}
55
56/// Invariants:
57///
58/// - `len <= MAX_FAST_GUID_LEN`.
59/// - `data[0..len]` encodes valid utf8.
60/// - `data[len..].iter().all(|&b| b == b'\0')`
61///
62/// Note: None of these are required for memory safety, just correctness.
63#[derive(Clone)]
64struct FastGuid {
65    len: u8,
66    data: [u8; MAX_FAST_GUID_LEN],
67}
68
69// This is the maximum length (experimentally determined) we can make it before
70// `Repr::Fast` is larger than `Guid::Slow` on 32 bit systems. The important
71// thing is really that it's not too big, and is above 12 bytes.
72const MAX_FAST_GUID_LEN: usize = 14;
73
74impl FastGuid {
75    #[inline]
76    fn from_slice(bytes: &[u8]) -> Self {
77        // Checked by the caller, so debug_assert is fine.
78        debug_assert!(
79            can_use_fast(bytes),
80            "Bug: Caller failed to check can_use_fast: {:?}",
81            bytes
82        );
83        let mut data = [0u8; MAX_FAST_GUID_LEN];
84        data[0..bytes.len()].copy_from_slice(bytes);
85        FastGuid {
86            len: bytes.len() as u8,
87            data,
88        }
89    }
90
91    #[inline]
92    fn as_str(&self) -> &str {
93        // Note: we only use debug_assert! to ensure valid utf8-ness, so this need
94        str::from_utf8(self.bytes()).expect("Invalid fast guid bytes!")
95    }
96
97    #[inline]
98    fn len(&self) -> usize {
99        self.len as usize
100    }
101
102    #[inline]
103    fn bytes(&self) -> &[u8] {
104        &self.data[0..self.len()]
105    }
106}
107
108// Returns:
109// - true to use Repr::Fast
110// - false to use Repr::Slow
111#[inline]
112fn can_use_fast<T: ?Sized + AsRef<[u8]>>(bytes: &T) -> bool {
113    let bytes = bytes.as_ref();
114    // This is fine as a debug_assert since we'll still panic if it's ever used
115    // in such a way where it would matter.
116    debug_assert!(str::from_utf8(bytes).is_ok());
117    bytes.len() <= MAX_FAST_GUID_LEN
118}
119
120impl Guid {
121    /// Create a guid from a `str`.
122    #[inline]
123    pub fn new(s: &str) -> Self {
124        Guid::from_slice(s.as_ref())
125    }
126
127    /// Create an empty guid. Usable as a constant.
128    #[inline]
129    pub const fn empty() -> Self {
130        Guid(Repr::Fast(FastGuid {
131            len: 0,
132            data: [0u8; MAX_FAST_GUID_LEN],
133        }))
134    }
135
136    /// Create a random guid (of 12 base64url characters). Requires the `random`
137    /// feature.
138    #[cfg(feature = "random")]
139    pub fn random() -> Self {
140        let bytes: [u8; 9] = rand::random();
141
142        // Note: only first 12 bytes are used, but remaining are required to
143        // build the FastGuid
144        let mut output = [0u8; MAX_FAST_GUID_LEN];
145
146        let bytes_written = URL_SAFE_NO_PAD
147            .encode_slice(bytes, &mut output[..12])
148            .expect("Output buffer too small");
149
150        debug_assert!(bytes_written == 12);
151
152        Guid(Repr::Fast(FastGuid {
153            len: 12,
154            data: output,
155        }))
156    }
157
158    /// Convert `b` into a `Guid`.
159    #[inline]
160    pub fn from_string(s: String) -> Self {
161        Guid::from_vec(s.into_bytes())
162    }
163
164    /// Convert `b` into a `Guid`.
165    #[inline]
166    pub fn from_slice(b: &[u8]) -> Self {
167        if can_use_fast(b) {
168            Guid(Repr::Fast(FastGuid::from_slice(b)))
169        } else {
170            Guid::new_slow(b.into())
171        }
172    }
173
174    /// Convert `v` to a `Guid`, consuming it.
175    #[inline]
176    pub fn from_vec(v: Vec<u8>) -> Self {
177        if can_use_fast(&v) {
178            Guid(Repr::Fast(FastGuid::from_slice(&v)))
179        } else {
180            Guid::new_slow(v)
181        }
182    }
183
184    /// Get the data backing this `Guid` as a `&[u8]`.
185    #[inline]
186    pub fn as_bytes(&self) -> &[u8] {
187        match &self.0 {
188            Repr::Fast(rep) => rep.bytes(),
189            Repr::Slow(rep) => rep.as_ref(),
190        }
191    }
192
193    /// Get the data backing this `Guid` as a `&str`.
194    #[inline]
195    pub fn as_str(&self) -> &str {
196        match &self.0 {
197            Repr::Fast(rep) => rep.as_str(),
198            Repr::Slow(rep) => rep.as_ref(),
199        }
200    }
201
202    /// Convert this `Guid` into a `String`, consuming it in the process.
203    #[inline]
204    pub fn into_string(self) -> String {
205        match self.0 {
206            Repr::Fast(rep) => rep.as_str().into(),
207            Repr::Slow(rep) => rep,
208        }
209    }
210
211    /// Returns true for Guids that are deemed valid by the sync server.
212    /// See https://github.com/mozilla-services/server-syncstorage/blob/d92ef07877aebd05b92f87f6ade341d6a55bffc8/syncstorage/bso.py#L24
213    pub fn is_valid_for_sync_server(&self) -> bool {
214        !self.is_empty()
215            && self.len() <= 64
216            && self
217                .bytes()
218                .all(|b| (b' '..=b'~').contains(&b) && b != b',')
219    }
220
221    /// Returns true for Guids that are valid places guids, and false for all others.
222    pub fn is_valid_for_places(&self) -> bool {
223        self.len() == 12 && self.bytes().all(Guid::is_valid_places_byte)
224    }
225
226    /// Returns true if the byte `b` is a valid base64url byte.
227    #[inline]
228    pub fn is_valid_places_byte(b: u8) -> bool {
229        BASE64URL_BYTES[b as usize] == 1
230    }
231
232    #[cold]
233    fn new_slow(v: Vec<u8>) -> Self {
234        assert!(
235            !can_use_fast(&v),
236            "Could use fast for guid (len = {})",
237            v.len()
238        );
239        Guid(Repr::Slow(
240            String::from_utf8(v).expect("Invalid slow guid bytes!"),
241        ))
242    }
243}
244
245// This is used to implement the places tests.
246const BASE64URL_BYTES: [u8; 256] = [
247    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
248    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
249    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
250    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
251    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
252    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
253    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
254    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255];
256
257impl Ord for Guid {
258    fn cmp(&self, other: &Self) -> Ordering {
259        self.as_bytes().cmp(other.as_bytes())
260    }
261}
262
263impl PartialOrd for Guid {
264    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
265        Some(self.cmp(other))
266    }
267}
268
269impl PartialEq for Guid {
270    fn eq(&self, other: &Self) -> bool {
271        self.as_bytes() == other.as_bytes()
272    }
273}
274
275impl Eq for Guid {}
276
277impl Hash for Guid {
278    fn hash<H: Hasher>(&self, state: &mut H) {
279        self.as_bytes().hash(state);
280    }
281}
282
283impl<'a> From<&'a str> for Guid {
284    #[inline]
285    fn from(s: &'a str) -> Guid {
286        Guid::from_slice(s.as_ref())
287    }
288}
289impl<'a> From<&'a &str> for Guid {
290    #[inline]
291    fn from(s: &'a &str) -> Guid {
292        Guid::from_slice(s.as_ref())
293    }
294}
295
296impl<'a> From<&'a [u8]> for Guid {
297    #[inline]
298    fn from(s: &'a [u8]) -> Guid {
299        Guid::from_slice(s)
300    }
301}
302
303impl From<String> for Guid {
304    #[inline]
305    fn from(s: String) -> Guid {
306        Guid::from_string(s)
307    }
308}
309
310impl From<Vec<u8>> for Guid {
311    #[inline]
312    fn from(v: Vec<u8>) -> Guid {
313        Guid::from_vec(v)
314    }
315}
316
317impl From<Guid> for String {
318    #[inline]
319    fn from(guid: Guid) -> String {
320        guid.into_string()
321    }
322}
323
324impl From<Guid> for Vec<u8> {
325    #[inline]
326    fn from(guid: Guid) -> Vec<u8> {
327        guid.into_string().into_bytes()
328    }
329}
330
331impl AsRef<str> for Guid {
332    #[inline]
333    fn as_ref(&self) -> &str {
334        self.as_str()
335    }
336}
337
338impl AsRef<[u8]> for Guid {
339    #[inline]
340    fn as_ref(&self) -> &[u8] {
341        self.as_bytes()
342    }
343}
344
345impl ops::Deref for Guid {
346    type Target = str;
347    #[inline]
348    fn deref(&self) -> &str {
349        self.as_str()
350    }
351}
352
353// The default Debug impl is pretty unhelpful here.
354impl fmt::Debug for Guid {
355    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
356        write!(f, "Guid({:?})", self.as_str())
357    }
358}
359
360impl fmt::Display for Guid {
361    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
362        fmt::Display::fmt(self.as_str(), f)
363    }
364}
365
366impl std::default::Default for Guid {
367    /// Create a default guid by calling `Guid::empty()`
368    #[inline]
369    fn default() -> Self {
370        Guid::empty()
371    }
372}
373
374macro_rules! impl_guid_eq {
375    ($($other: ty),+) => {$(
376        // This macro is used for items with and without lifetimes.
377        #[allow(clippy::extra_unused_lifetimes)]
378        impl<'a> PartialEq<$other> for Guid {
379            #[inline]
380            fn eq(&self, other: &$other) -> bool {
381                PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other))
382            }
383        }
384
385        #[allow(clippy::extra_unused_lifetimes)]
386        impl<'a> PartialEq<Guid> for $other {
387            #[inline]
388            fn eq(&self, other: &Guid) -> bool {
389                PartialEq::eq(AsRef::<[u8]>::as_ref(self), AsRef::<[u8]>::as_ref(other))
390            }
391        }
392    )+}
393}
394
395// Implement direct comparison with some common types from the stdlib.
396impl_guid_eq![str, &'a str, String, [u8], &'a [u8], Vec<u8>];
397
398#[cfg(test)]
399mod test {
400    use super::*;
401
402    #[test]
403    fn test_base64url_bytes() {
404        let mut expect = [0u8; 256];
405        for b in b'0'..=b'9' {
406            expect[b as usize] = 1;
407        }
408        for b in b'a'..=b'z' {
409            expect[b as usize] = 1;
410        }
411        for b in b'A'..=b'Z' {
412            expect[b as usize] = 1;
413        }
414        expect[b'_' as usize] = 1;
415        expect[b'-' as usize] = 1;
416        assert_eq!(&BASE64URL_BYTES[..], &expect[..]);
417    }
418
419    #[test]
420    fn test_valid_for_places() {
421        assert!(Guid::from("aaaabbbbcccc").is_valid_for_places());
422        assert!(Guid::from_slice(b"09_az-AZ_09-").is_valid_for_places());
423        assert!(!Guid::from("aaaabbbbccccd").is_valid_for_places()); // too long
424        assert!(!Guid::from("aaaabbbbccc").is_valid_for_places()); // too short
425        assert!(!Guid::from("aaaabbbbccc=").is_valid_for_places()); // right length, bad character
426        assert!(!Guid::empty().is_valid_for_places()); // empty isn't valid to insert.
427    }
428
429    #[test]
430    fn test_valid_for_sync_server() {
431        assert!(!Guid::empty().is_valid_for_sync_server()); // empty isn't valid remotely.
432    }
433
434    #[allow(clippy::cmp_owned)] // See clippy note below.
435    #[test]
436    fn test_comparison() {
437        assert_eq!(Guid::from("abcdabcdabcd"), "abcdabcdabcd");
438        assert_ne!(Guid::from("abcdabcdabcd".to_string()), "ABCDabcdabcd");
439
440        assert_eq!(Guid::from("abcdabcdabcd"), &b"abcdabcdabcd"[..]); // b"abcdabcdabcd" has type &[u8; 12]...
441        assert_ne!(Guid::from(&b"abcdabcdabcd"[..]), &b"ABCDabcdabcd"[..]);
442
443        assert_eq!(
444            Guid::from(b"abcdabcdabcd"[..].to_owned()),
445            "abcdabcdabcd".to_string()
446        );
447        assert_ne!(Guid::from("abcdabcdabcd"), "ABCDabcdabcd".to_string());
448
449        assert_eq!(
450            Guid::from("abcdabcdabcd1234"),
451            Vec::from(b"abcdabcdabcd1234".as_ref())
452        );
453        assert_ne!(
454            Guid::from("abcdabcdabcd4321"),
455            Vec::from(b"ABCDabcdabcd4321".as_ref())
456        );
457
458        // order by data instead of length
459        // hrmph - clippy in 1.54-nightly complains about the below:
460        // 'error: this creates an owned instance just for comparison'
461        // '... help: try: `*"aaaaaa"`'
462        // and suggests a change that's wrong - so we've ignored the lint above.
463        assert!(Guid::from("zzz") > Guid::from("aaaaaa"));
464        assert!(Guid::from("ThisIsASolowGuid") < Guid::from("zzz"));
465        assert!(Guid::from("ThisIsASolowGuid") > Guid::from("AnotherSlowGuid"));
466    }
467
468    #[cfg(feature = "random")]
469    #[test]
470    fn test_random() {
471        use std::collections::HashSet;
472        // Used to verify uniqueness within our sample of 1000. Could cause
473        // random failures, but desktop has the same test, and it's never caused
474        // a problem AFAIK.
475        let mut seen: HashSet<String> = HashSet::new();
476        for _ in 0..1000 {
477            let g = Guid::random();
478            assert_eq!(g.len(), 12);
479            assert!(g.is_valid_for_places());
480            let decoded = URL_SAFE_NO_PAD.decode(&g).unwrap();
481            assert_eq!(decoded.len(), 9);
482            let no_collision = seen.insert(g.clone().into_string());
483            assert!(no_collision, "{}", g);
484        }
485    }
486}