webext_storage/
migration.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use crate::error::*;
6use rusqlite::{named_params, Connection, OpenFlags, Transaction};
7use serde_json::{Map, Value};
8use sql_support::ConnExt;
9use std::collections::HashSet;
10use std::path::Path;
11
12// Simple migration from the "old" kinto-with-sqlite-backing implementation
13// to ours.
14// Could almost be trivially done in JS using the regular public API if not
15// for:
16// * We don't want to enforce the same quotas when migrating.
17// * We'd rather do the entire migration in a single transaction for perf
18//   reasons.
19
20// The sqlite database we migrate from has a very simple structure:
21// * table collection_data with columns collection_name, record_id and record
22// * `collection_name` is a string of form "default/{extension_id}"
23// * `record_id` is `key-{key}`
24// * `record` is a string with json, of form: {
25//     id: {the record id repeated},
26//     key: {the key},
27//     data: {the actual data},
28//     _status: {sync status},
29//     last_modified: {timestamp},
30//  }
31// So the info we need is stored somewhat redundantly.
32// Further:
33// * There's a special collection_name "default/storage-sync-crypto" that
34//   we don't want to migrate. Its record_id is 'keys' and its json has no
35//   `data`
36
37// Note we don't enforce a quota - we migrate everything - even if this means
38// it's too big for the server to store. This is a policy decision - it's better
39// to not lose data than to try and work out what data can be disposed of, as
40// the addon has the ability to determine this.
41
42// Our error strategy is "ignore read errors, propagate write errors" under the
43// assumption that the former tends to mean a damaged DB or file-system and is
44// unlikely to work if we try later (eg, replacing the disk isn't likely to
45// uncorrupt the DB), where the latter is likely to be disk-space or file-system
46// error, but retry might work (eg, replacing the disk then trying again might
47// make the writes work)
48
49// The struct we read from the DB.
50struct LegacyRow {
51    col_name: String, // collection_name column
52    record: String,   // record column
53}
54
55impl LegacyRow {
56    // Parse the 2 columns from the DB into the data we need to insert into
57    // our target database.
58    fn parse(&self) -> Option<Parsed<'_>> {
59        if self.col_name.len() < 8 {
60            trace!("collection_name of '{}' is too short", self.col_name);
61            return None;
62        }
63        if &self.col_name[..8] != "default/" {
64            trace!("collection_name of '{}' isn't ours", self.col_name);
65            return None;
66        }
67        let ext_id = &self.col_name[8..];
68        let mut record_map = match serde_json::from_str(&self.record) {
69            Ok(Value::Object(m)) => m,
70            Ok(o) => {
71                info!("skipping non-json-object 'record' column");
72                trace!("record value is json, but not an object: {}", o);
73                return None;
74            }
75            Err(e) => {
76                info!("skipping non-json 'record' column");
77                trace!("record value isn't json: {}", e);
78                return None;
79            }
80        };
81
82        let key = match record_map.remove("key") {
83            Some(Value::String(s)) if !s.is_empty() => s,
84            Some(o) => {
85                trace!("key is json but not a string: {}", o);
86                return None;
87            }
88            _ => {
89                trace!("key doesn't exist in the map");
90                return None;
91            }
92        };
93        let data = match record_map.remove("data") {
94            Some(d) => d,
95            _ => {
96                trace!("data doesn't exist in the map");
97                return None;
98            }
99        };
100        Some(Parsed { ext_id, key, data })
101    }
102}
103
104// The info we parse from the raw DB strings.
105struct Parsed<'a> {
106    ext_id: &'a str,
107    key: String,
108    data: serde_json::Value,
109}
110
111pub fn migrate(tx: &Transaction<'_>, filename: &Path) -> Result<MigrationInfo> {
112    // We do the grouping manually, collecting string values as we go.
113    let mut last_ext_id = "".to_string();
114    let mut curr_values: Vec<(String, serde_json::Value)> = Vec::new();
115    let (rows, mut mi) = read_rows(filename);
116    for row in rows {
117        trace!("processing '{}' - '{}'", row.col_name, row.record);
118        let parsed = match row.parse() {
119            Some(p) => p,
120            None => continue,
121        };
122        // Do our "grouping"
123        if parsed.ext_id != last_ext_id {
124            if !last_ext_id.is_empty() && !curr_values.is_empty() {
125                // a different extension id - write what we have to the DB.
126                let entries = do_insert(tx, &last_ext_id, curr_values)?;
127                mi.extensions_successful += 1;
128                mi.entries_successful += entries;
129            }
130            last_ext_id = parsed.ext_id.to_string();
131            curr_values = Vec::new();
132        }
133        // no 'else' here - must also enter this block on ext_id change.
134        if parsed.ext_id == last_ext_id {
135            curr_values.push((parsed.key.to_string(), parsed.data));
136            trace!(
137                "extension {} now has {} keys",
138                parsed.ext_id,
139                curr_values.len()
140            );
141        }
142    }
143    // and the last one
144    if !last_ext_id.is_empty() && !curr_values.is_empty() {
145        // a different extension id - write what we have to the DB.
146        let entries = do_insert(tx, &last_ext_id, curr_values)?;
147        mi.extensions_successful += 1;
148        mi.entries_successful += entries;
149    }
150    info!("migrated {} extensions: {:?}", mi.extensions_successful, mi);
151    Ok(mi)
152}
153
154fn read_rows(filename: &Path) -> (Vec<LegacyRow>, MigrationInfo) {
155    let flags = OpenFlags::SQLITE_OPEN_NO_MUTEX | OpenFlags::SQLITE_OPEN_READ_ONLY;
156    let src_conn = match Connection::open_with_flags(filename, flags) {
157        Ok(conn) => conn,
158        Err(e) => {
159            warn!("Failed to open the source DB: {}", e);
160            return (Vec::new(), MigrationInfo::open_failure());
161        }
162    };
163    // Failure to prepare the statement probably just means the source DB is
164    // damaged.
165    let mut stmt = match src_conn.prepare(
166        "SELECT collection_name, record FROM collection_data
167         WHERE collection_name != 'default/storage-sync-crypto'
168         ORDER BY collection_name",
169    ) {
170        Ok(stmt) => stmt,
171        Err(e) => {
172            warn!("Failed to prepare the statement: {}", e);
173            return (Vec::new(), MigrationInfo::open_failure());
174        }
175    };
176    let rows = match stmt.query_and_then([], |row| -> Result<LegacyRow> {
177        Ok(LegacyRow {
178            col_name: row.get(0)?,
179            record: row.get(1)?,
180        })
181    }) {
182        Ok(r) => r,
183        Err(e) => {
184            warn!("Failed to read any rows from the source DB: {}", e);
185            return (Vec::new(), MigrationInfo::open_failure());
186        }
187    };
188    let all_rows: Vec<Result<LegacyRow>> = rows.collect();
189    let entries = all_rows.len();
190    let successful_rows: Vec<LegacyRow> = all_rows.into_iter().filter_map(Result::ok).collect();
191    let distinct_extensions: HashSet<_> = successful_rows.iter().map(|c| &c.col_name).collect();
192
193    let mi = MigrationInfo {
194        entries,
195        extensions: distinct_extensions.len(),
196        // Populated later.
197        extensions_successful: 0,
198        entries_successful: 0,
199        open_failure: false,
200    };
201
202    (successful_rows, mi)
203}
204
205/// Insert the extension and values. If there are multiple values with the same
206/// key (which shouldn't be possible but who knows, database corruption causes
207/// strange things), chooses an arbitrary one. Returns the number of entries
208/// inserted, which could be different from `vals.len()` if multiple entries in
209/// `vals` have the same key.
210fn do_insert(tx: &Transaction<'_>, ext_id: &str, vals: Vec<(String, Value)>) -> Result<usize> {
211    let mut map = Map::with_capacity(vals.len());
212    for (key, val) in vals {
213        map.insert(key, val);
214    }
215    let num_entries = map.len();
216    tx.execute_cached(
217        "INSERT OR REPLACE INTO storage_sync_data(ext_id, data, sync_change_counter)
218         VALUES (:ext_id, :data, 1)",
219        rusqlite::named_params! {
220            ":ext_id": &ext_id,
221            ":data": &Value::Object(map),
222        },
223    )?;
224    Ok(num_entries)
225}
226
227#[derive(Debug, Clone, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
228pub struct MigrationInfo {
229    /// The number of entries (rows in the original table) we attempted to
230    /// migrate. Zero if there was some error in computing this number.
231    ///
232    /// Note that for the original table, a single row stores a single
233    /// preference for one extension. That is, if you view the set of
234    /// preferences for a given extension as a HashMap (as we do), it would be a
235    /// single entry/key-value-pair in the map.
236    pub entries: usize,
237    /// The number of records we successfully migrated (equal to `entries` for
238    /// entirely successful migrations).
239    pub entries_successful: usize,
240    /// The number of extensions (distinct extension ids) in the original
241    /// table.
242    pub extensions: usize,
243    /// The number of extensions we successfully migrated
244    pub extensions_successful: usize,
245    /// True iff we failed to open the source DB at all.
246    pub open_failure: bool,
247}
248
249impl MigrationInfo {
250    /// Returns a MigrationInfo indicating that we failed to read any rows due
251    /// to some error case (e.g. the database open failed, or some other very
252    /// early read error).
253    fn open_failure() -> Self {
254        Self {
255            open_failure: true,
256            ..Self::default()
257        }
258    }
259
260    const META_KEY: &'static str = "migration_info";
261
262    /// Store `self` in the provided database under `Self::META_KEY`.
263    pub(crate) fn store(&self, conn: &Connection) -> Result<()> {
264        let json = serde_json::to_string(self)?;
265        conn.execute(
266            "INSERT OR REPLACE INTO meta(key, value) VALUES (:k, :v)",
267            named_params! {
268                ":k": Self::META_KEY,
269                ":v": &json
270            },
271        )?;
272        Ok(())
273    }
274
275    /// Get the MigrationInfo stored under `Self::META_KEY` (if any) out of the
276    /// DB, and delete it.
277    pub(crate) fn take(tx: &Transaction<'_>) -> Result<Option<Self>> {
278        let s = tx.try_query_one::<String, _>(
279            "SELECT value FROM meta WHERE key = :k",
280            named_params! {
281                ":k": Self::META_KEY,
282            },
283            false,
284        )?;
285        tx.execute(
286            "DELETE FROM meta WHERE key = :k",
287            named_params! {
288                ":k": Self::META_KEY,
289            },
290        )?;
291        if let Some(s) = s {
292            match serde_json::from_str(&s) {
293                Ok(v) => Ok(Some(v)),
294                Err(e) => {
295                    // Force test failure, but just log an error otherwise so that
296                    // we commit the transaction that wil.
297                    debug_assert!(false, "Failed to read migration JSON: {:?}", e);
298                    error_support::report_error!(
299                        "webext-storage-migration-json",
300                        "Failed to read migration JSON: {}",
301                        e
302                    );
303                    Ok(None)
304                }
305            }
306        } else {
307            Ok(None)
308        }
309    }
310}
311#[cfg(test)]
312mod tests {
313    use super::*;
314    use crate::api;
315    use crate::db::{test::new_mem_db, StorageDb};
316    use serde_json::json;
317    use tempfile::tempdir;
318
319    fn init_source_db(path: impl AsRef<Path>, f: impl FnOnce(&Connection)) {
320        let flags = OpenFlags::SQLITE_OPEN_NO_MUTEX
321            | OpenFlags::SQLITE_OPEN_CREATE
322            | OpenFlags::SQLITE_OPEN_READ_WRITE;
323        let mut conn = Connection::open_with_flags(path, flags).expect("open should work");
324        let tx = conn.transaction().expect("should be able to get a tx");
325        tx.execute_batch(
326            "CREATE TABLE collection_data (
327                collection_name TEXT,
328                record_id TEXT,
329                record TEXT
330            );",
331        )
332        .expect("create should work");
333        f(&tx);
334        tx.commit().expect("should commit");
335        conn.close().expect("close should work");
336    }
337
338    // Create a test database, populate it via the callback, migrate it, and
339    // return a connection to the new, migrated DB for further checking.
340    fn do_migrate<F>(expect_mi: MigrationInfo, f: F) -> StorageDb
341    where
342        F: FnOnce(&Connection),
343    {
344        let tmpdir = tempdir().unwrap();
345        let path = tmpdir.path().join("source.db");
346        init_source_db(path, f);
347
348        // now migrate
349        let db = new_mem_db();
350        let conn = db.get_connection().expect("should retrieve connection");
351        let tx = conn.unchecked_transaction().expect("tx should work");
352
353        let mi = migrate(&tx, &tmpdir.path().join("source.db")).expect("migrate should work");
354        tx.commit().expect("should work");
355        assert_eq!(mi, expect_mi);
356        db
357    }
358
359    fn assert_has(c: &Connection, ext_id: &str, expect: Value) {
360        assert_eq!(
361            api::get(c, ext_id, json!(null)).expect("should get"),
362            expect
363        );
364    }
365
366    const HAPPY_PATH_SQL: &str = r#"
367        INSERT INTO collection_data(collection_name, record)
368        VALUES
369            ('default/{e7fefcf3-b39c-4f17-5215-ebfe120a7031}', '{"id":"key-userWelcomed","key":"userWelcomed","data":1570659224457,"_status":"synced","last_modified":1579755940527}'),
370            ('default/{e7fefcf3-b39c-4f17-5215-ebfe120a7031}', '{"id":"key-isWho","key":"isWho","data":"4ec8109f","_status":"synced","last_modified":1579755940497}'),
371            ('default/storage-sync-crypto', '{"id":"keys","keys":{"default":["rQ=","lR="],"collections":{"extension@redux.devtools":["Bd=","ju="]}}}'),
372            ('default/https-everywhere@eff.org', '{"id":"key-userRules","key":"userRules","data":[],"_status":"synced","last_modified":1570079920045}'),
373            ('default/https-everywhere@eff.org', '{"id":"key-ruleActiveStates","key":"ruleActiveStates","data":{},"_status":"synced","last_modified":1570079919993}'),
374            ('default/https-everywhere@eff.org', '{"id":"key-migration_5F_version","key":"migration_version","data":2,"_status":"synced","last_modified":1570079919966}')
375    "#;
376    const HAPPY_PATH_MIGRATION_INFO: MigrationInfo = MigrationInfo {
377        entries: 5,
378        entries_successful: 5,
379        extensions: 2,
380        extensions_successful: 2,
381        open_failure: false,
382    };
383
384    #[allow(clippy::unreadable_literal)]
385    #[test]
386    fn test_happy_paths() {
387        // some real data.
388        let db = do_migrate(HAPPY_PATH_MIGRATION_INFO, |c| {
389            c.execute_batch(HAPPY_PATH_SQL).expect("should populate")
390        });
391        let conn = db.get_connection().expect("should retrieve connection");
392
393        assert_has(
394            conn,
395            "{e7fefcf3-b39c-4f17-5215-ebfe120a7031}",
396            json!({"userWelcomed": 1570659224457u64, "isWho": "4ec8109f"}),
397        );
398        assert_has(
399            conn,
400            "https-everywhere@eff.org",
401            json!({"userRules": [], "ruleActiveStates": {}, "migration_version": 2}),
402        );
403    }
404
405    #[test]
406    fn test_sad_paths() {
407        do_migrate(
408            MigrationInfo {
409                entries: 10,
410                entries_successful: 0,
411                extensions: 6,
412                extensions_successful: 0,
413                open_failure: false,
414            },
415            |c| {
416                c.execute_batch(
417                    r#"INSERT INTO collection_data(collection_name, record)
418                    VALUES
419                    ('default/test', '{"key":2,"data":1}'), -- key not a string
420                    ('default/test', '{"key":"","data":1}'), -- key empty string
421                    ('default/test', '{"xey":"k","data":1}'), -- key missing
422                    ('default/test', '{"key":"k","xata":1}'), -- data missing
423                    ('default/test', '{"key":"k","data":1'), -- invalid json
424                    ('xx/test', '{"key":"k","data":1}'), -- bad key format
425                    ('default', '{"key":"k","data":1}'), -- bad key format 2
426                    ('default/', '{"key":"k","data":1}'), -- bad key format 3
427                    ('defaultx/test', '{"key":"k","data":1}'), -- bad key format 4
428                    ('', '') -- empty strings
429                    "#,
430                )
431                .expect("should populate");
432            },
433        );
434    }
435
436    #[test]
437    fn test_migration_info_storage() {
438        let tmpdir = tempdir().unwrap();
439        let path = tmpdir.path().join("source.db");
440        init_source_db(&path, |c| {
441            c.execute_batch(HAPPY_PATH_SQL).expect("should populate")
442        });
443
444        // now migrate
445        let db = crate::store::test::new_mem_store();
446        db.migrate(&path).expect("migration should work");
447        let mi = db
448            .take_migration_info()
449            .expect("take failed with info present");
450        assert_eq!(mi, Some(HAPPY_PATH_MIGRATION_INFO));
451        let mi2 = db
452            .take_migration_info()
453            .expect("take failed with info missing");
454        assert_eq!(mi2, None);
455    }
456}