suggest/
schema.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 */
5
6use crate::{db::Sqlite3Extension, util::i18n_cmp};
7use rusqlite::{Connection, Transaction};
8use sql_support::{
9    open_database::{self, ConnectionInitializer},
10    ConnExt,
11};
12
13/// The current database schema version.
14///
15/// For any changes to the schema [`SQL`], please make sure to:
16///
17///  1. Bump this version.
18///  2. Add a migration from the old version to the new version in
19///     [`SuggestConnectionInitializer::upgrade_from`].
20///     a. If suggestions should be re-ingested after the migration, call `clear_database()` inside
21///     the migration.
22///  3. If the migration adds any tables, delete their their rows in
23///     `clear_database()` by adding their names to `conditional_tables`, unless
24///     they are cleared via a deletion trigger or there's some other good
25///     reason not to do so.
26pub const VERSION: u32 = 44;
27
28/// The current Suggest database schema.
29pub const SQL: &str = "
30CREATE TABLE meta(
31    key TEXT PRIMARY KEY,
32    value NOT NULL
33) WITHOUT ROWID;
34
35CREATE TABLE ingested_records(
36    id TEXT,
37    collection TEXT,
38    type TEXT NOT NULL,
39    last_modified INTEGER NOT NULL,
40    PRIMARY KEY (id, collection)
41) WITHOUT ROWID;
42
43CREATE TABLE keywords(
44    keyword TEXT NOT NULL,
45    suggestion_id INTEGER NOT NULL,
46    full_keyword_id INTEGER NULL,
47    rank INTEGER NOT NULL,
48    PRIMARY KEY (keyword, suggestion_id)
49) WITHOUT ROWID;
50
51CREATE TABLE keywords_i18n(
52    keyword TEXT NOT NULL COLLATE i18n_collate,
53    suggestion_id INTEGER NOT NULL,
54    full_keyword_id INTEGER NULL,
55    rank INTEGER NOT NULL,
56    PRIMARY KEY (keyword, suggestion_id)
57) WITHOUT ROWID;
58
59-- Keywords metrics per record ID and type. Currently we only record metrics for
60-- a small number of record types.
61CREATE TABLE keywords_metrics(
62    record_id TEXT NOT NULL PRIMARY KEY,
63    record_type TEXT NOT NULL,
64    max_len INTEGER NOT NULL,
65    max_word_count INTEGER NOT NULL
66) WITHOUT ROWID;
67
68CREATE INDEX keywords_metrics_record_type ON keywords_metrics(record_type);
69
70-- full keywords are what we display to the user when a (partial) keyword matches
71CREATE TABLE full_keywords(
72    id INTEGER PRIMARY KEY,
73    suggestion_id INTEGER NOT NULL,
74    full_keyword TEXT NOT NULL
75);
76
77CREATE TABLE prefix_keywords(
78    keyword_prefix TEXT NOT NULL,
79    keyword_suffix TEXT NOT NULL DEFAULT '',
80    confidence INTEGER NOT NULL DEFAULT 0,
81    rank INTEGER NOT NULL,
82    suggestion_id INTEGER NOT NULL,
83    PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
84) WITHOUT ROWID;
85
86CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
87
88CREATE TABLE serp_categories(
89    suggestion_id INTEGER NOT NULL,
90    category INTEGER NOT NULL,
91    PRIMARY KEY (suggestion_id, category)
92) WITHOUT ROWID;
93
94CREATE TABLE suggestions(
95    id INTEGER PRIMARY KEY,
96    record_id TEXT NOT NULL,
97    provider INTEGER NOT NULL,
98    title TEXT NOT NULL,
99    url TEXT NOT NULL,
100    score REAL NOT NULL
101);
102
103CREATE TABLE amp_custom_details(
104    suggestion_id INTEGER PRIMARY KEY,
105    advertiser TEXT NOT NULL,
106    block_id INTEGER NOT NULL,
107    iab_category TEXT NOT NULL,
108    impression_url TEXT NOT NULL,
109    click_url TEXT NOT NULL,
110    icon_id TEXT NOT NULL,
111    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
112);
113
114CREATE TABLE wikipedia_custom_details(
115    suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
116    icon_id TEXT NOT NULL
117);
118
119CREATE TABLE amo_custom_details(
120    suggestion_id INTEGER PRIMARY KEY,
121    description TEXT NOT NULL,
122    guid TEXT NOT NULL,
123    icon_url TEXT NOT NULL,
124    rating TEXT,
125    number_of_ratings INTEGER NOT NULL,
126    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
127);
128
129CREATE TABLE fakespot_custom_details(
130    suggestion_id INTEGER PRIMARY KEY,
131    fakespot_grade TEXT NOT NULL,
132    product_id TEXT NOT NULL,
133    keywords TEXT NOT NULL,
134    product_type TEXT NOT NULL,
135    rating REAL NOT NULL,
136    total_reviews INTEGER NOT NULL,
137    icon_id TEXT,
138    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
139);
140
141CREATE VIRTUAL TABLE IF NOT EXISTS fakespot_fts USING FTS5(
142  title,
143  content='',
144  contentless_delete=1,
145  tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
146);
147
148CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
149  INSERT INTO fakespot_fts(rowid, title)
150    SELECT id, title
151    FROM suggestions
152    WHERE id = new.suggestion_id;
153END;
154
155CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5(
156  full_keywords,
157  title,
158  content='',
159  contentless_delete=1,
160  tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
161);
162
163-- DELETE/UPDATE triggers are difficult to implement, since the FTS contents are split between the fakespot_custom_details and suggestions tables.
164-- If you use an AFTER trigger, then the data from the other table has already been deleted.
165-- BEFORE triggers are discouraged by the SQLite docs.
166-- Instead, the drop_suggestions function handles updating the FTS data.
167
168CREATE INDEX suggestions_record_id ON suggestions(record_id);
169
170CREATE TABLE icons(
171    id TEXT PRIMARY KEY,
172    data BLOB NOT NULL,
173    mimetype TEXT NOT NULL
174) WITHOUT ROWID;
175
176CREATE TABLE yelp_subjects(
177    keyword TEXT PRIMARY KEY,
178    subject_type INTEGER NOT NULL DEFAULT 0,
179    record_id TEXT NOT NULL
180) WITHOUT ROWID;
181
182CREATE TABLE yelp_modifiers(
183    type INTEGER NOT NULL,
184    keyword TEXT NOT NULL,
185    record_id TEXT NOT NULL,
186    PRIMARY KEY (type, keyword)
187) WITHOUT ROWID;
188
189CREATE TABLE yelp_custom_details(
190    icon_id TEXT PRIMARY KEY,
191    score REAL NOT NULL,
192    record_id TEXT NOT NULL
193) WITHOUT ROWID;
194
195CREATE TABLE mdn_custom_details(
196    suggestion_id INTEGER PRIMARY KEY,
197    description TEXT NOT NULL,
198    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
199);
200
201CREATE TABLE dynamic_custom_details(
202    suggestion_id INTEGER PRIMARY KEY,
203    suggestion_type TEXT NOT NULL,
204    json_data TEXT,
205    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
206);
207CREATE INDEX dynamic_custom_details_suggestion_type ON dynamic_custom_details(suggestion_type);
208
209CREATE TABLE geonames(
210    id INTEGER PRIMARY KEY,
211    record_id TEXT NOT NULL,
212    name TEXT NOT NULL,
213    feature_class TEXT NOT NULL,
214    feature_code TEXT NOT NULL,
215    country_code TEXT NOT NULL,
216    admin1_code TEXT,
217    admin2_code TEXT,
218    admin3_code TEXT,
219    admin4_code TEXT,
220    population INTEGER,
221    latitude TEXT,
222    longitude TEXT
223);
224
225-- `language` is a lowercase ISO 639 code: 'en', 'de', 'fr', etc. It can also be
226-- a geonames pseudo-language like 'abbr' for abbreviations and 'iata' for
227-- airport codes. It will be null for names derived from a geoname's primary
228-- name (see `Geoname::name` and `Geoname::ascii_name`).
229-- `geoname_id` is not defined as a foreign key because the main geonames
230-- records are not guaranteed to be ingested before alternates records.
231CREATE TABLE geonames_alternates(
232    id INTEGER PRIMARY KEY,
233    record_id TEXT NOT NULL,
234    geoname_id INTEGER NOT NULL,
235    language TEXT,
236    name TEXT NOT NULL COLLATE geonames_collate,
237    is_preferred INTEGER,
238    is_short INTEGER
239);
240
241CREATE INDEX geonames_alternates_geoname_id_language
242    ON geonames_alternates(geoname_id, language);
243
244CREATE INDEX geonames_alternates_name
245    ON geonames_alternates(name);
246
247CREATE TRIGGER geonames_alternates_delete AFTER DELETE ON geonames BEGIN
248    DELETE FROM geonames_alternates
249    WHERE geoname_id = old.id;
250END;
251
252CREATE TABLE geonames_metrics(
253    record_id TEXT NOT NULL PRIMARY KEY,
254    max_name_length INTEGER NOT NULL,
255    max_name_word_count INTEGER NOT NULL
256) WITHOUT ROWID;
257
258-- `url` may be an opaque dismissal key rather than a URL depending on the
259-- suggestion type.
260CREATE TABLE dismissed_suggestions (
261    url TEXT PRIMARY KEY
262) WITHOUT ROWID;
263
264CREATE TABLE dismissed_dynamic_suggestions (
265    suggestion_type TEXT,
266    dismissal_key TEXT NOT NULL,
267    PRIMARY KEY(suggestion_type, dismissal_key)
268) WITHOUT ROWID;
269";
270
271/// Initializes an SQLite connection to the Suggest database, performing
272/// migrations as needed.
273#[derive(Default)]
274pub struct SuggestConnectionInitializer<'a> {
275    extensions_to_load: &'a [Sqlite3Extension],
276}
277
278impl<'a> SuggestConnectionInitializer<'a> {
279    pub fn new(extensions_to_load: &'a [Sqlite3Extension]) -> Self {
280        Self { extensions_to_load }
281    }
282
283    pub fn load_extensions(&self, conn: &Connection) -> open_database::Result<()> {
284        // Safety: this relies on the extensions we're loading to operate correctly, for the
285        // entry point to be correct, etc.
286        unsafe {
287            let _guard = rusqlite::LoadExtensionGuard::new(conn)?;
288            for ext in self.extensions_to_load {
289                conn.load_extension(&ext.library, ext.entry_point.as_deref())?;
290            }
291        }
292        Ok(())
293    }
294
295    fn create_custom_functions(&self, conn: &Connection) -> open_database::Result<()> {
296        // `geonames_collate` is deprecated, use `i18n_collate` instead. The
297        // collations are the same and ideally we'd remove `geonames_collate`,
298        // but then we'd need to recreate the geonames table in a migration, and
299        // it doesn't seem worth it.
300        conn.create_collation("geonames_collate", i18n_cmp)?;
301        conn.create_collation("i18n_collate", i18n_cmp)?;
302        Ok(())
303    }
304}
305
306impl ConnectionInitializer for SuggestConnectionInitializer<'_> {
307    const NAME: &'static str = "suggest db";
308    const END_VERSION: u32 = VERSION;
309
310    fn prepare(&self, conn: &Connection, _db_empty: bool) -> open_database::Result<()> {
311        self.load_extensions(conn)?;
312        sql_support::setup_sqlite_defaults(conn)?;
313        conn.execute("PRAGMA foreign_keys = ON", ())?;
314        sql_support::debug_tools::define_debug_functions(conn)?;
315        self.create_custom_functions(conn)?;
316        Ok(())
317    }
318
319    fn init(&self, db: &Transaction<'_>) -> open_database::Result<()> {
320        db.execute_batch(SQL)?;
321        Ok(())
322    }
323
324    fn upgrade_from(&self, tx: &Transaction<'_>, version: u32) -> open_database::Result<()> {
325        // Custom functions are per connection. `prepare` usually handles
326        // creating them but on upgrade it's not called before this method is.
327        self.create_custom_functions(tx)?;
328
329        match version {
330            1..=15 => {
331                // Treat databases with these older schema versions as corrupt,
332                // so that they'll be replaced by a fresh, empty database with
333                // the current schema.
334                Err(open_database::Error::Corrupt)
335            }
336            16 => {
337                tx.execute(
338                    "
339                    CREATE TABLE dismissed_suggestions (
340                        url_hash INTEGER PRIMARY KEY
341                    ) WITHOUT ROWID;",
342                    (),
343                )?;
344                Ok(())
345            }
346            17 => {
347                tx.execute_batch(
348                    "
349                    DROP TABLE dismissed_suggestions;
350                    CREATE TABLE dismissed_suggestions (
351                        url TEXT PRIMARY KEY
352                    ) WITHOUT ROWID;",
353                )?;
354                Ok(())
355            }
356            18 => {
357                tx.execute_batch(
358                    "
359                    CREATE TABLE IF NOT EXISTS dismissed_suggestions (
360                        url TEXT PRIMARY KEY
361                    ) WITHOUT ROWID;",
362                )?;
363                Ok(())
364            }
365            19 => {
366                // Clear the database since we're going to be dropping the keywords table and
367                // re-creating it
368                clear_database(tx)?;
369                tx.execute_batch(
370                    "
371                    -- Recreate the various keywords table to drop the foreign keys.
372                    DROP TABLE keywords;
373                    DROP TABLE full_keywords;
374                    DROP TABLE prefix_keywords;
375                    CREATE TABLE keywords(
376                        keyword TEXT NOT NULL,
377                        suggestion_id INTEGER NOT NULL,
378                        full_keyword_id INTEGER NULL,
379                        rank INTEGER NOT NULL,
380                        PRIMARY KEY (keyword, suggestion_id)
381                    ) WITHOUT ROWID;
382                    CREATE TABLE full_keywords(
383                        id INTEGER PRIMARY KEY,
384                        suggestion_id INTEGER NOT NULL,
385                        full_keyword TEXT NOT NULL
386                    );
387                    CREATE TABLE prefix_keywords(
388                        keyword_prefix TEXT NOT NULL,
389                        keyword_suffix TEXT NOT NULL DEFAULT '',
390                        confidence INTEGER NOT NULL DEFAULT 0,
391                        rank INTEGER NOT NULL,
392                        suggestion_id INTEGER NOT NULL,
393                        PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
394                    ) WITHOUT ROWID;
395                    CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
396                    ",
397                )?;
398                Ok(())
399            }
400
401            // Migration for the fakespot data.  This is not currently active for any users, it's
402            // only used for the tests.  It's safe to alter the fakespot_custom_detail schema and
403            // update this migration as the project moves forward.
404            //
405            // Note: if we want to add a regular migration while the fakespot code is still behind
406            // a feature flag, insert it before this one and make fakespot the last migration.
407            20 => {
408                tx.execute_batch(
409                    "
410                CREATE TABLE fakespot_custom_details(
411                    suggestion_id INTEGER PRIMARY KEY,
412                    fakespot_grade TEXT NOT NULL,
413                    product_id TEXT NOT NULL,
414                    rating REAL NOT NULL,
415                    total_reviews INTEGER NOT NULL,
416                    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
417                );
418                -- Create the Fakespot FTS table.
419                -- The `tokenize` param is hard to read.  The effect is that dashes and apostrophes are
420                -- considered valid tokens in a word, rather than separators.
421                CREATE VIRTUAL TABLE IF NOT EXISTS fakespot_fts USING FTS5(
422                  title,
423                  prefix='4 5 6 7 8 9 10 11',
424                  content='',
425                  contentless_delete=1,
426                  tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
427                );
428                CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
429                  INSERT INTO fakespot_fts(rowid, title)
430                    SELECT id, title
431                    FROM suggestions
432                    WHERE id = new.suggestion_id;
433                END;
434                ",
435                )?;
436                Ok(())
437            }
438            21 => {
439                // Drop and re-create the fakespot_custom_details to add the icon_id column.
440                tx.execute_batch(
441                    "
442                    DROP TABLE fakespot_custom_details;
443                    CREATE TABLE fakespot_custom_details(
444                        suggestion_id INTEGER PRIMARY KEY,
445                        fakespot_grade TEXT NOT NULL,
446                        product_id TEXT NOT NULL,
447                        rating REAL NOT NULL,
448                        total_reviews INTEGER NOT NULL,
449                        icon_id TEXT,
450                        FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
451                    );
452                    CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
453                      INSERT INTO fakespot_fts(rowid, title)
454                        SELECT id, title
455                        FROM suggestions
456                        WHERE id = new.suggestion_id;
457                    END;
458                    ",
459                )?;
460                Ok(())
461            }
462            22 => {
463                // Drop and re-create the fakespot_fts table to remove the prefix index param
464                tx.execute_batch(
465                    "
466                    DROP TABLE fakespot_fts;
467                    CREATE VIRTUAL TABLE fakespot_fts USING FTS5(
468                      title,
469                      content='',
470                      contentless_delete=1,
471                      tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
472                    );
473                    ",
474                )?;
475                Ok(())
476            }
477            23 => {
478                // Drop all suggestions, then recreate the fakespot_custom_details table to add the
479                // `keywords` and `product_type` fields.
480                clear_database(tx)?;
481                tx.execute_batch(
482                    "
483                    DROP TABLE fakespot_custom_details;
484                    CREATE TABLE fakespot_custom_details(
485                        suggestion_id INTEGER PRIMARY KEY,
486                        fakespot_grade TEXT NOT NULL,
487                        product_id TEXT NOT NULL,
488                        keywords TEXT NOT NULL,
489                        product_type TEXT NOT NULL,
490                        rating REAL NOT NULL,
491                        total_reviews INTEGER NOT NULL,
492                        icon_id TEXT,
493                        FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
494                    );
495                    CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
496                      INSERT INTO fakespot_fts(rowid, title)
497                        SELECT id, title
498                        FROM suggestions
499                        WHERE id = new.suggestion_id;
500                    END;
501                    ",
502                )?;
503                Ok(())
504            }
505            24 => {
506                // Clear the database so that we re-ingest and populate the ingested_records table.
507                clear_database(tx)?;
508                tx.execute_batch(
509                    "
510                    CREATE TABLE rs_cache(
511                        collection TEXT PRIMARY KEY,
512                        data TEXT NOT NULL
513                    ) WITHOUT ROWID;
514                    CREATE TABLE ingested_records(
515                        id TEXT,
516                        collection TEXT,
517                        type TEXT NOT NULL,
518                        last_modified INTEGER NOT NULL,
519                        PRIMARY KEY (id, collection)
520                    ) WITHOUT ROWID;
521                    ",
522                )?;
523                Ok(())
524            }
525            25 => {
526                // Create the exposure suggestions table and index.
527                tx.execute_batch(
528                    "
529                    CREATE TABLE exposure_custom_details(
530                        suggestion_id INTEGER PRIMARY KEY,
531                        type TEXT NOT NULL,
532                        FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
533                    );
534                    CREATE INDEX exposure_custom_details_type ON exposure_custom_details(type);
535                    ",
536                )?;
537                Ok(())
538            }
539            26 => {
540                // Create tables related to city-based weather.
541                tx.execute_batch(
542                    "
543                    CREATE TABLE keywords_metrics(
544                        record_id TEXT NOT NULL PRIMARY KEY,
545                        provider INTEGER NOT NULL,
546                        max_length INTEGER NOT NULL,
547                        max_word_count INTEGER NOT NULL
548                    ) WITHOUT ROWID;
549
550                    CREATE TABLE geonames(
551                        id INTEGER PRIMARY KEY,
552                        record_id TEXT NOT NULL,
553                        name TEXT NOT NULL,
554                        feature_class TEXT NOT NULL,
555                        feature_code TEXT NOT NULL,
556                        country_code TEXT NOT NULL,
557                        admin1_code TEXT NOT NULL,
558                        population INTEGER
559                    );
560                    CREATE INDEX geonames_feature_class ON geonames(feature_class);
561                    CREATE INDEX geonames_feature_code ON geonames(feature_code);
562
563                    CREATE TABLE geonames_alternates(
564                        name TEXT NOT NULL,
565                        geoname_id INTEGER NOT NULL,
566                        PRIMARY KEY (name, geoname_id),
567                        FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE
568                    ) WITHOUT ROWID;
569
570                    CREATE TABLE geonames_metrics(
571                        record_id TEXT NOT NULL PRIMARY KEY,
572                        max_name_length INTEGER NOT NULL,
573                        max_name_word_count INTEGER NOT NULL
574                    ) WITHOUT ROWID;
575                    ",
576                )?;
577                Ok(())
578            }
579            27 => {
580                // Add latitude and longitude to the geonames table. Clear the
581                // database so geonames are reingested.
582                clear_database(tx)?;
583                tx.execute_batch(
584                    "
585                    DROP INDEX geonames_feature_class;
586                    DROP INDEX geonames_feature_code;
587                    DROP TABLE geonames;
588                    CREATE TABLE geonames(
589                        id INTEGER PRIMARY KEY,
590                        record_id TEXT NOT NULL,
591                        name TEXT NOT NULL,
592                        latitude REAL NOT NULL,
593                        longitude REAL NOT NULL,
594                        feature_class TEXT NOT NULL,
595                        feature_code TEXT NOT NULL,
596                        country_code TEXT NOT NULL,
597                        admin1_code TEXT NOT NULL,
598                        population INTEGER NOT NULL
599                    );
600                    CREATE INDEX geonames_feature_class ON geonames(feature_class);
601                    CREATE INDEX geonames_feature_code ON geonames(feature_code);
602                    ",
603                )?;
604                Ok(())
605            }
606            28 => {
607                // Add `iso_language` column to `geonames_alternates`. Clear the
608                // database so geonames are reingested.
609                clear_database(tx)?;
610                tx.execute_batch(
611                    "
612                    DROP TABLE geonames_alternates;
613                    CREATE TABLE geonames_alternates(
614                        name TEXT NOT NULL,
615                        geoname_id INTEGER NOT NULL,
616                        -- The value of the `iso_language` field for the alternate. This will be
617                        -- null for the alternate we artificially create for the `name` in the
618                        -- corresponding geoname record.
619                        iso_language TEXT,
620                        PRIMARY KEY (name, geoname_id),
621                        FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE
622                    ) WITHOUT ROWID;
623                    ",
624                )?;
625                Ok(())
626            }
627            29 => {
628                // This migration only clears the database because some tables
629                // that should have been cleared in previous migrations were
630                // not.
631                clear_database(tx)?;
632                Ok(())
633            }
634            30 => {
635                // Add the `geonames_alternates_geoname_id` index.
636                clear_database(tx)?;
637                tx.execute_batch(
638                    "
639                    CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id);
640                    ",
641                )?;
642                Ok(())
643            }
644            31 => {
645                // Need to clear the database so that the FTS index will get filled.
646                clear_database(tx)?;
647                tx.execute_batch(
648                    "
649                    CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5(
650                      full_keywords,
651                      title,
652                      content='',
653                      contentless_delete=1,
654                      tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
655                    );
656
657                    ",
658                )?;
659                Ok(())
660            }
661            32 => {
662                // Drop rs_cache since it's no longer needed.
663                tx.execute_batch("DROP TABLE rs_cache;")?;
664                Ok(())
665            }
666            33 => {
667                // This migration is due to the replacement of the
668                // `quicksuggest` collection with `quicksuggest-amp` and
669                // `quicksuggest-other`. Clear the DB so that records from the
670                // old collection don't stick around. See bug 1953945.
671                clear_database(tx)?;
672                Ok(())
673            }
674            34 => {
675                // Replace the exposure suggestions table and index with the
676                // dynamic suggestions table and index.
677                tx.execute_batch(
678                    r#"
679                    DROP INDEX exposure_custom_details_type;
680                    DROP TABLE exposure_custom_details;
681                    CREATE TABLE dynamic_custom_details(
682                        suggestion_id INTEGER PRIMARY KEY,
683                        suggestion_type TEXT NOT NULL,
684                        json_data TEXT,
685                        FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
686                    );
687                    CREATE INDEX dynamic_custom_details_suggestion_type ON dynamic_custom_details(suggestion_type);
688                    "#,
689                )?;
690                Ok(())
691            }
692            35 => {
693                // The commit that added this migration was reverted.
694                Ok(())
695            }
696            36 => {
697                tx.execute_batch("DROP TABLE IF EXISTS yelp_location_signs;")?;
698                Ok(())
699            }
700            37 => {
701                clear_database(tx)?;
702                tx.execute_batch(
703                    "
704                    DROP TABLE yelp_subjects;
705                    CREATE TABLE yelp_subjects(
706                        keyword TEXT PRIMARY KEY,
707                        subject_type INTEGER NOT NULL DEFAULT 0,
708                        record_id TEXT NOT NULL
709                    ) WITHOUT ROWID;
710                    ",
711                )?;
712                Ok(())
713            }
714            38 => {
715                // This migration makes changes to geonames.
716                tx.execute_batch(
717                    r#"
718                    DROP INDEX geonames_alternates_geoname_id;
719                    DROP TABLE geonames_alternates;
720
721                    DROP INDEX geonames_feature_class;
722                    DROP INDEX geonames_feature_code;
723                    DROP TABLE geonames;
724
725                    CREATE TABLE geonames(
726                        id INTEGER PRIMARY KEY,
727                        record_id TEXT NOT NULL,
728                        name TEXT NOT NULL,
729                        feature_class TEXT NOT NULL,
730                        feature_code TEXT NOT NULL,
731                        country_code TEXT NOT NULL,
732                        admin1_code TEXT,
733                        admin2_code TEXT,
734                        admin3_code TEXT,
735                        admin4_code TEXT,
736                        population INTEGER,
737                        latitude TEXT,
738                        longitude TEXT
739                    );
740
741                    CREATE TABLE geonames_alternates(
742                        record_id TEXT NOT NULL,
743                        geoname_id INTEGER NOT NULL,
744                        language TEXT,
745                        name TEXT NOT NULL COLLATE geonames_collate,
746                        PRIMARY KEY(geoname_id, language, name)
747                    );
748                    CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id);
749                    CREATE INDEX geonames_alternates_name ON geonames_alternates(name);
750
751                    CREATE TRIGGER geonames_alternates_delete AFTER DELETE ON geonames BEGIN
752                        DELETE FROM geonames_alternates
753                        WHERE geoname_id = old.id;
754                    END;
755                    "#,
756                )?;
757                Ok(())
758            }
759            39 => {
760                // This migration makes changes to keywords metrics.
761                clear_database(tx)?;
762                tx.execute_batch(
763                    r#"
764                    DROP TABLE keywords_metrics;
765                    CREATE TABLE keywords_metrics(
766                        record_id TEXT NOT NULL PRIMARY KEY,
767                        record_type TEXT NOT NULL,
768                        max_len INTEGER NOT NULL,
769                        max_word_count INTEGER NOT NULL
770                    ) WITHOUT ROWID;
771                    CREATE INDEX keywords_metrics_record_type ON keywords_metrics(record_type);
772                    "#,
773                )?;
774                Ok(())
775            }
776            40 => {
777                // This migration makes changes to geonames.
778                clear_database(tx)?;
779                tx.execute_batch(
780                    r#"
781                    DROP INDEX geonames_alternates_geoname_id;
782                    DROP INDEX geonames_alternates_name;
783                    DROP TABLE geonames_alternates;
784
785                    CREATE TABLE geonames_alternates(
786                        id INTEGER PRIMARY KEY,
787                        record_id TEXT NOT NULL,
788                        geoname_id INTEGER NOT NULL,
789                        language TEXT,
790                        name TEXT NOT NULL COLLATE geonames_collate,
791                        is_preferred INTEGER,
792                        is_short INTEGER
793                    );
794                    CREATE INDEX geonames_alternates_geoname_id_language
795                        ON geonames_alternates(geoname_id, language);
796                    CREATE INDEX geonames_alternates_name
797                        ON geonames_alternates(name);
798                    "#,
799                )?;
800                Ok(())
801            }
802            41 => {
803                // This migration introduces the `keywords_i18n` table and makes
804                // changes to how keywords metrics are calculated. Clear the DB
805                // so that weather and geonames names are added to the new table
806                // and also so that keywords metrics are recalculated.
807                clear_database(tx)?;
808                tx.execute_batch(
809                    r#"
810                    CREATE TABLE keywords_i18n(
811                        keyword TEXT NOT NULL COLLATE i18n_collate,
812                        suggestion_id INTEGER NOT NULL,
813                        full_keyword_id INTEGER NULL,
814                        rank INTEGER NOT NULL,
815                        PRIMARY KEY (keyword, suggestion_id)
816                    ) WITHOUT ROWID;
817                    "#,
818                )?;
819                Ok(())
820            }
821            42 => {
822                clear_database(tx)?;
823                tx.execute_batch(
824                    r#"
825                    CREATE TABLE serp_categories(
826                        suggestion_id INTEGER NOT NULL,
827                        category INTEGER NOT NULL,
828                        PRIMARY KEY (suggestion_id, category)
829                    ) WITHOUT ROWID;
830                    "#,
831                )?;
832                Ok(())
833            }
834            43 => {
835                clear_database(tx)?;
836                tx.execute_batch(
837                    r#"
838                    CREATE TABLE dismissed_dynamic_suggestions (
839                        suggestion_type TEXT,
840                        dismissal_key TEXT NOT NULL,
841                        PRIMARY KEY(suggestion_type, dismissal_key)
842                    ) WITHOUT ROWID;
843
844                    INSERT INTO dismissed_dynamic_suggestions
845                    SELECT "vpn", url FROM dismissed_suggestions WHERE url = "vpn-suggestions";
846                    "#,
847                )?;
848                Ok(())
849            }
850
851            _ => Err(open_database::Error::IncompatibleVersion(version)),
852        }
853    }
854}
855
856/// Clears the database, removing all suggestions, icons, and metadata.
857pub fn clear_database(db: &Connection) -> rusqlite::Result<()> {
858    // If you update this, you probably need to update
859    // `SuggestDao::drop_suggestions` too!
860
861    db.execute_batch(
862        "
863        DELETE FROM meta;
864        DELETE FROM keywords;
865        DELETE FROM full_keywords;
866        DELETE FROM prefix_keywords;
867        DELETE FROM suggestions;
868        DELETE FROM icons;
869        DELETE FROM yelp_subjects;
870        DELETE FROM yelp_modifiers;
871        DELETE FROM yelp_custom_details;
872        ",
873    )?;
874    let conditional_tables = [
875        "fakespot_fts",
876        "geonames",
877        "geonames_metrics",
878        "ingested_records",
879        "keywords_i18n",
880        "keywords_metrics",
881        "serp_categories",
882    ];
883    for t in conditional_tables {
884        let table_exists = db.exists("SELECT 1 FROM sqlite_master WHERE name = ?", [t])?;
885        if table_exists {
886            db.execute(&format!("DELETE FROM {t}"), ())?;
887        }
888    }
889    Ok(())
890}
891
892#[cfg(test)]
893mod test {
894    use super::*;
895    use sql_support::open_database::test_utils::MigratedDatabaseFile;
896
897    // Snapshot of the v16 schema.  We use this to test that we can migrate from there to the
898    // current schema.
899    const V16_SCHEMA: &str = r#"
900CREATE TABLE meta(
901    key TEXT PRIMARY KEY,
902    value NOT NULL
903) WITHOUT ROWID;
904
905CREATE TABLE keywords(
906    keyword TEXT NOT NULL,
907    suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
908    full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL,
909    rank INTEGER NOT NULL,
910    PRIMARY KEY (keyword, suggestion_id)
911) WITHOUT ROWID;
912
913-- full keywords are what we display to the user when a (partial) keyword matches
914-- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted.
915CREATE TABLE full_keywords(
916    id INTEGER PRIMARY KEY,
917    suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
918    full_keyword TEXT NOT NULL
919);
920
921CREATE TABLE prefix_keywords(
922    keyword_prefix TEXT NOT NULL,
923    keyword_suffix TEXT NOT NULL DEFAULT '',
924    confidence INTEGER NOT NULL DEFAULT 0,
925    rank INTEGER NOT NULL,
926    suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
927    PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
928) WITHOUT ROWID;
929
930CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
931
932CREATE TABLE suggestions(
933    id INTEGER PRIMARY KEY,
934    record_id TEXT NOT NULL,
935    provider INTEGER NOT NULL,
936    title TEXT NOT NULL,
937    url TEXT NOT NULL,
938    score REAL NOT NULL
939);
940
941CREATE TABLE amp_custom_details(
942    suggestion_id INTEGER PRIMARY KEY,
943    advertiser TEXT NOT NULL,
944    block_id INTEGER NOT NULL,
945    iab_category TEXT NOT NULL,
946    impression_url TEXT NOT NULL,
947    click_url TEXT NOT NULL,
948    icon_id TEXT NOT NULL,
949    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
950);
951
952CREATE TABLE wikipedia_custom_details(
953    suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
954    icon_id TEXT NOT NULL
955);
956
957CREATE TABLE amo_custom_details(
958    suggestion_id INTEGER PRIMARY KEY,
959    description TEXT NOT NULL,
960    guid TEXT NOT NULL,
961    icon_url TEXT NOT NULL,
962    rating TEXT,
963    number_of_ratings INTEGER NOT NULL,
964    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
965);
966
967CREATE INDEX suggestions_record_id ON suggestions(record_id);
968
969CREATE TABLE icons(
970    id TEXT PRIMARY KEY,
971    data BLOB NOT NULL,
972    mimetype TEXT NOT NULL
973) WITHOUT ROWID;
974
975CREATE TABLE yelp_subjects(
976    keyword TEXT PRIMARY KEY,
977    record_id TEXT NOT NULL
978) WITHOUT ROWID;
979
980CREATE TABLE yelp_modifiers(
981    type INTEGER NOT NULL,
982    keyword TEXT NOT NULL,
983    record_id TEXT NOT NULL,
984    PRIMARY KEY (type, keyword)
985) WITHOUT ROWID;
986
987CREATE TABLE yelp_location_signs(
988    keyword TEXT PRIMARY KEY,
989    need_location INTEGER NOT NULL,
990    record_id TEXT NOT NULL
991) WITHOUT ROWID;
992
993CREATE TABLE yelp_custom_details(
994    icon_id TEXT PRIMARY KEY,
995    score REAL NOT NULL,
996    record_id TEXT NOT NULL
997) WITHOUT ROWID;
998
999CREATE TABLE mdn_custom_details(
1000    suggestion_id INTEGER PRIMARY KEY,
1001    description TEXT NOT NULL,
1002    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
1003);
1004
1005PRAGMA user_version=16;
1006"#;
1007
1008    /// Test running all schema upgrades from V16, which was the first schema with a "real"
1009    /// migration.
1010    ///
1011    /// If an upgrade fails, then this test will fail with a panic.
1012    #[test]
1013    fn test_all_upgrades() {
1014        let db_file =
1015            MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
1016        db_file.run_all_upgrades();
1017        db_file.assert_schema_matches_new_database();
1018    }
1019
1020    /// Test that `clear_database()` works correctly during migrations.
1021    ///
1022    /// TODO: This only checks `ingested_records` for now since it's very
1023    /// important, but ideally this would test all tables.
1024    #[test]
1025    fn test_clear_database() -> anyhow::Result<()> {
1026        // Start with the v16 schema.
1027        let db_file =
1028            MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
1029
1030        // Upgrade to v25, the first version with with `ingested_records` tables.
1031        db_file.upgrade_to(25);
1032
1033        // Insert some ingested records.
1034        let conn = db_file.open();
1035        conn.execute(
1036            "INSERT INTO ingested_records(id, collection, type, last_modified) VALUES(?, ?, ?, ?)",
1037            ("record-id", "quicksuggest", "record-type", 1),
1038        )?;
1039        conn.close().expect("Connection should be closed");
1040
1041        // Finish upgrading to the current version.
1042        db_file.upgrade_to(VERSION);
1043        db_file.assert_schema_matches_new_database();
1044
1045        // `ingested_records` should be empty.
1046        let conn = db_file.open();
1047        assert_eq!(
1048            conn.conn_ext_query_one::<i32>("SELECT count(*) FROM ingested_records")?,
1049            0,
1050            "ingested_records should be empty"
1051        );
1052        conn.close().expect("Connection should be closed");
1053
1054        Ok(())
1055    }
1056
1057    /// Test that yelp_location_signs table could be removed correctly.
1058    #[test]
1059    fn test_remove_yelp_location_signs_table() -> anyhow::Result<()> {
1060        // Start with the v16 schema.
1061        let db_file =
1062            MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
1063
1064        // Upgrade to v36.
1065        db_file.upgrade_to(36);
1066
1067        // Drop the table to simulate old 35 > 36 migration.
1068        let conn = db_file.open();
1069        conn.execute("DROP table yelp_location_signs", ())?;
1070        conn.close().expect("Connection should be closed");
1071
1072        // Finish upgrading to the current version.
1073        db_file.upgrade_to(VERSION);
1074        db_file.assert_schema_matches_new_database();
1075
1076        Ok(())
1077    }
1078
1079    /// Test if dynamic vpn suggestions are migrated into the
1080    /// dismissed_dynamic_suggestions table during the migration to schema 44.
1081    #[test]
1082    fn test_migrate_vpn_dismissal() -> anyhow::Result<()> {
1083        let db_file =
1084            MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
1085
1086        // Upgrade to v43 (just before the migration) and insert the suggestion that should be migrated.
1087        db_file.upgrade_to(43);
1088        let conn = db_file.open();
1089        conn.execute(
1090            "INSERT INTO dismissed_suggestions VALUES (?)",
1091            ("vpn-suggestions",),
1092        )?;
1093        conn.close().expect("Connection should be closed");
1094        // Finish upgrading to the current version.
1095        db_file.upgrade_to(VERSION);
1096        db_file.assert_schema_matches_new_database();
1097
1098        // Check if the vpn suggestion was migrated.
1099        let conn = db_file.open();
1100        let mut stmt = conn.prepare("SELECT * FROM dismissed_dynamic_suggestions")?;
1101        let mut rows = stmt.query([])?;
1102        let row = rows.next()?.expect("Should have one row");
1103        let suggestion_type: String = row.get(0)?;
1104        assert_eq!(suggestion_type, "vpn", "Has correct suggestion_type");
1105        let dismissal_key: String = row.get(1)?;
1106        assert_eq!(
1107            dismissal_key, "vpn-suggestions",
1108            "Has correct suggestion_type"
1109        );
1110        assert!(rows.next()?.is_none(), "Should not have other rows");
1111        Ok(())
1112    }
1113}