suggest/
schema.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 */
5
6use crate::{db::Sqlite3Extension, util::i18n_cmp};
7use rusqlite::{Connection, Transaction};
8use sql_support::{
9    open_database::{self, ConnectionInitializer},
10    ConnExt,
11};
12
13/// The current database schema version.
14///
15/// For any changes to the schema [`SQL`], please make sure to:
16///
17///  1. Bump this version.
18///  2. Add a migration from the old version to the new version in
19///     [`SuggestConnectionInitializer::upgrade_from`].
20///     a. If suggestions should be re-ingested after the migration, call `clear_database()` inside
21///     the migration.
22///  3. If the migration adds any tables, delete their their rows in
23///     `clear_database()` by adding their names to `conditional_tables`, unless
24///     they are cleared via a deletion trigger or there's some other good
25///     reason not to do so.
26pub const VERSION: u32 = 42;
27
28/// The current Suggest database schema.
29pub const SQL: &str = "
30CREATE TABLE meta(
31    key TEXT PRIMARY KEY,
32    value NOT NULL
33) WITHOUT ROWID;
34
35CREATE TABLE ingested_records(
36    id TEXT,
37    collection TEXT,
38    type TEXT NOT NULL,
39    last_modified INTEGER NOT NULL,
40    PRIMARY KEY (id, collection)
41) WITHOUT ROWID;
42
43CREATE TABLE keywords(
44    keyword TEXT NOT NULL,
45    suggestion_id INTEGER NOT NULL,
46    full_keyword_id INTEGER NULL,
47    rank INTEGER NOT NULL,
48    PRIMARY KEY (keyword, suggestion_id)
49) WITHOUT ROWID;
50
51CREATE TABLE keywords_i18n(
52    keyword TEXT NOT NULL COLLATE i18n_collate,
53    suggestion_id INTEGER NOT NULL,
54    full_keyword_id INTEGER NULL,
55    rank INTEGER NOT NULL,
56    PRIMARY KEY (keyword, suggestion_id)
57) WITHOUT ROWID;
58
59-- Keywords metrics per record ID and type. Currently we only record metrics for
60-- a small number of record types.
61CREATE TABLE keywords_metrics(
62    record_id TEXT NOT NULL PRIMARY KEY,
63    record_type TEXT NOT NULL,
64    max_len INTEGER NOT NULL,
65    max_word_count INTEGER NOT NULL
66) WITHOUT ROWID;
67
68CREATE INDEX keywords_metrics_record_type ON keywords_metrics(record_type);
69
70-- full keywords are what we display to the user when a (partial) keyword matches
71CREATE TABLE full_keywords(
72    id INTEGER PRIMARY KEY,
73    suggestion_id INTEGER NOT NULL,
74    full_keyword TEXT NOT NULL
75);
76
77CREATE TABLE prefix_keywords(
78    keyword_prefix TEXT NOT NULL,
79    keyword_suffix TEXT NOT NULL DEFAULT '',
80    confidence INTEGER NOT NULL DEFAULT 0,
81    rank INTEGER NOT NULL,
82    suggestion_id INTEGER NOT NULL,
83    PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
84) WITHOUT ROWID;
85
86CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
87
88CREATE TABLE suggestions(
89    id INTEGER PRIMARY KEY,
90    record_id TEXT NOT NULL,
91    provider INTEGER NOT NULL,
92    title TEXT NOT NULL,
93    url TEXT NOT NULL,
94    score REAL NOT NULL
95);
96
97CREATE TABLE amp_custom_details(
98    suggestion_id INTEGER PRIMARY KEY,
99    advertiser TEXT NOT NULL,
100    block_id INTEGER NOT NULL,
101    iab_category TEXT NOT NULL,
102    impression_url TEXT NOT NULL,
103    click_url TEXT NOT NULL,
104    icon_id TEXT NOT NULL,
105    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
106);
107
108CREATE TABLE wikipedia_custom_details(
109    suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
110    icon_id TEXT NOT NULL
111);
112
113CREATE TABLE amo_custom_details(
114    suggestion_id INTEGER PRIMARY KEY,
115    description TEXT NOT NULL,
116    guid TEXT NOT NULL,
117    icon_url TEXT NOT NULL,
118    rating TEXT,
119    number_of_ratings INTEGER NOT NULL,
120    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
121);
122
123CREATE TABLE fakespot_custom_details(
124    suggestion_id INTEGER PRIMARY KEY,
125    fakespot_grade TEXT NOT NULL,
126    product_id TEXT NOT NULL,
127    keywords TEXT NOT NULL,
128    product_type TEXT NOT NULL,
129    rating REAL NOT NULL,
130    total_reviews INTEGER NOT NULL,
131    icon_id TEXT,
132    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
133);
134
135CREATE VIRTUAL TABLE IF NOT EXISTS fakespot_fts USING FTS5(
136  title,
137  content='',
138  contentless_delete=1,
139  tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
140);
141
142CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
143  INSERT INTO fakespot_fts(rowid, title)
144    SELECT id, title
145    FROM suggestions
146    WHERE id = new.suggestion_id;
147END;
148
149CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5(
150  full_keywords,
151  title,
152  content='',
153  contentless_delete=1,
154  tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
155);
156
157-- DELETE/UPDATE triggers are difficult to implement, since the FTS contents are split between the fakespot_custom_details and suggestions tables.
158-- If you use an AFTER trigger, then the data from the other table has already been deleted.
159-- BEFORE triggers are discouraged by the SQLite docs.
160-- Instead, the drop_suggestions function handles updating the FTS data.
161
162CREATE INDEX suggestions_record_id ON suggestions(record_id);
163
164CREATE TABLE icons(
165    id TEXT PRIMARY KEY,
166    data BLOB NOT NULL,
167    mimetype TEXT NOT NULL
168) WITHOUT ROWID;
169
170CREATE TABLE yelp_subjects(
171    keyword TEXT PRIMARY KEY,
172    subject_type INTEGER NOT NULL DEFAULT 0,
173    record_id TEXT NOT NULL
174) WITHOUT ROWID;
175
176CREATE TABLE yelp_modifiers(
177    type INTEGER NOT NULL,
178    keyword TEXT NOT NULL,
179    record_id TEXT NOT NULL,
180    PRIMARY KEY (type, keyword)
181) WITHOUT ROWID;
182
183CREATE TABLE yelp_custom_details(
184    icon_id TEXT PRIMARY KEY,
185    score REAL NOT NULL,
186    record_id TEXT NOT NULL
187) WITHOUT ROWID;
188
189CREATE TABLE mdn_custom_details(
190    suggestion_id INTEGER PRIMARY KEY,
191    description TEXT NOT NULL,
192    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
193);
194
195CREATE TABLE dynamic_custom_details(
196    suggestion_id INTEGER PRIMARY KEY,
197    suggestion_type TEXT NOT NULL,
198    json_data TEXT,
199    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
200);
201CREATE INDEX dynamic_custom_details_suggestion_type ON dynamic_custom_details(suggestion_type);
202
203CREATE TABLE geonames(
204    id INTEGER PRIMARY KEY,
205    record_id TEXT NOT NULL,
206    name TEXT NOT NULL,
207    feature_class TEXT NOT NULL,
208    feature_code TEXT NOT NULL,
209    country_code TEXT NOT NULL,
210    admin1_code TEXT,
211    admin2_code TEXT,
212    admin3_code TEXT,
213    admin4_code TEXT,
214    population INTEGER,
215    latitude TEXT,
216    longitude TEXT
217);
218
219-- `language` is a lowercase ISO 639 code: 'en', 'de', 'fr', etc. It can also be
220-- a geonames pseudo-language like 'abbr' for abbreviations and 'iata' for
221-- airport codes. It will be null for names derived from a geoname's primary
222-- name (see `Geoname::name` and `Geoname::ascii_name`).
223-- `geoname_id` is not defined as a foreign key because the main geonames
224-- records are not guaranteed to be ingested before alternates records.
225CREATE TABLE geonames_alternates(
226    id INTEGER PRIMARY KEY,
227    record_id TEXT NOT NULL,
228    geoname_id INTEGER NOT NULL,
229    language TEXT,
230    name TEXT NOT NULL COLLATE geonames_collate,
231    is_preferred INTEGER,
232    is_short INTEGER
233);
234
235CREATE INDEX geonames_alternates_geoname_id_language
236    ON geonames_alternates(geoname_id, language);
237
238CREATE INDEX geonames_alternates_name
239    ON geonames_alternates(name);
240
241CREATE TRIGGER geonames_alternates_delete AFTER DELETE ON geonames BEGIN
242    DELETE FROM geonames_alternates
243    WHERE geoname_id = old.id;
244END;
245
246CREATE TABLE geonames_metrics(
247    record_id TEXT NOT NULL PRIMARY KEY,
248    max_name_length INTEGER NOT NULL,
249    max_name_word_count INTEGER NOT NULL
250) WITHOUT ROWID;
251
252-- `url` may be an opaque dismissal key rather than a URL depending on the
253-- suggestion type.
254CREATE TABLE dismissed_suggestions (
255    url TEXT PRIMARY KEY
256) WITHOUT ROWID;
257";
258
259/// Initializes an SQLite connection to the Suggest database, performing
260/// migrations as needed.
261#[derive(Default)]
262pub struct SuggestConnectionInitializer<'a> {
263    extensions_to_load: &'a [Sqlite3Extension],
264}
265
266impl<'a> SuggestConnectionInitializer<'a> {
267    pub fn new(extensions_to_load: &'a [Sqlite3Extension]) -> Self {
268        Self { extensions_to_load }
269    }
270
271    pub fn load_extensions(&self, conn: &Connection) -> open_database::Result<()> {
272        // Safety: this relies on the extensions we're loading to operate correctly, for the
273        // entry point to be correct, etc.
274        unsafe {
275            let _guard = rusqlite::LoadExtensionGuard::new(conn)?;
276            for ext in self.extensions_to_load {
277                conn.load_extension(&ext.library, ext.entry_point.as_deref())?;
278            }
279        }
280        Ok(())
281    }
282
283    fn create_custom_functions(&self, conn: &Connection) -> open_database::Result<()> {
284        // `geonames_collate` is deprecated, use `i18n_collate` instead. The
285        // collations are the same and ideally we'd remove `geonames_collate`,
286        // but then we'd need to recreate the geonames table in a migration, and
287        // it doesn't seem worth it.
288        conn.create_collation("geonames_collate", i18n_cmp)?;
289        conn.create_collation("i18n_collate", i18n_cmp)?;
290        Ok(())
291    }
292}
293
294impl ConnectionInitializer for SuggestConnectionInitializer<'_> {
295    const NAME: &'static str = "suggest db";
296    const END_VERSION: u32 = VERSION;
297
298    fn prepare(&self, conn: &Connection, _db_empty: bool) -> open_database::Result<()> {
299        self.load_extensions(conn)?;
300        sql_support::setup_sqlite_defaults(conn)?;
301        conn.execute("PRAGMA foreign_keys = ON", ())?;
302        sql_support::debug_tools::define_debug_functions(conn)?;
303        self.create_custom_functions(conn)?;
304        Ok(())
305    }
306
307    fn init(&self, db: &Transaction<'_>) -> open_database::Result<()> {
308        db.execute_batch(SQL)?;
309        Ok(())
310    }
311
312    fn upgrade_from(&self, tx: &Transaction<'_>, version: u32) -> open_database::Result<()> {
313        // Custom functions are per connection. `prepare` usually handles
314        // creating them but on upgrade it's not called before this method is.
315        self.create_custom_functions(tx)?;
316
317        match version {
318            1..=15 => {
319                // Treat databases with these older schema versions as corrupt,
320                // so that they'll be replaced by a fresh, empty database with
321                // the current schema.
322                Err(open_database::Error::Corrupt)
323            }
324            16 => {
325                tx.execute(
326                    "
327                    CREATE TABLE dismissed_suggestions (
328                        url_hash INTEGER PRIMARY KEY
329                    ) WITHOUT ROWID;",
330                    (),
331                )?;
332                Ok(())
333            }
334            17 => {
335                tx.execute(
336                    "
337                    DROP TABLE dismissed_suggestions;
338                    CREATE TABLE dismissed_suggestions (
339                        url TEXT PRIMARY KEY
340                    ) WITHOUT ROWID;",
341                    (),
342                )?;
343                Ok(())
344            }
345            18 => {
346                tx.execute_batch(
347                    "
348                    CREATE TABLE IF NOT EXISTS dismissed_suggestions (
349                        url TEXT PRIMARY KEY
350                    ) WITHOUT ROWID;",
351                )?;
352                Ok(())
353            }
354            19 => {
355                // Clear the database since we're going to be dropping the keywords table and
356                // re-creating it
357                clear_database(tx)?;
358                tx.execute_batch(
359                    "
360                    -- Recreate the various keywords table to drop the foreign keys.
361                    DROP TABLE keywords;
362                    DROP TABLE full_keywords;
363                    DROP TABLE prefix_keywords;
364                    CREATE TABLE keywords(
365                        keyword TEXT NOT NULL,
366                        suggestion_id INTEGER NOT NULL,
367                        full_keyword_id INTEGER NULL,
368                        rank INTEGER NOT NULL,
369                        PRIMARY KEY (keyword, suggestion_id)
370                    ) WITHOUT ROWID;
371                    CREATE TABLE full_keywords(
372                        id INTEGER PRIMARY KEY,
373                        suggestion_id INTEGER NOT NULL,
374                        full_keyword TEXT NOT NULL
375                    );
376                    CREATE TABLE prefix_keywords(
377                        keyword_prefix TEXT NOT NULL,
378                        keyword_suffix TEXT NOT NULL DEFAULT '',
379                        confidence INTEGER NOT NULL DEFAULT 0,
380                        rank INTEGER NOT NULL,
381                        suggestion_id INTEGER NOT NULL,
382                        PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
383                    ) WITHOUT ROWID;
384                    CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
385                    ",
386                )?;
387                Ok(())
388            }
389
390            // Migration for the fakespot data.  This is not currently active for any users, it's
391            // only used for the tests.  It's safe to alter the fakespot_custom_detail schema and
392            // update this migration as the project moves forward.
393            //
394            // Note: if we want to add a regular migration while the fakespot code is still behind
395            // a feature flag, insert it before this one and make fakespot the last migration.
396            20 => {
397                tx.execute_batch(
398                    "
399                CREATE TABLE fakespot_custom_details(
400                    suggestion_id INTEGER PRIMARY KEY,
401                    fakespot_grade TEXT NOT NULL,
402                    product_id TEXT NOT NULL,
403                    rating REAL NOT NULL,
404                    total_reviews INTEGER NOT NULL,
405                    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
406                );
407                -- Create the Fakespot FTS table.
408                -- The `tokenize` param is hard to read.  The effect is that dashes and apostrophes are
409                -- considered valid tokens in a word, rather than separators.
410                CREATE VIRTUAL TABLE IF NOT EXISTS fakespot_fts USING FTS5(
411                  title,
412                  prefix='4 5 6 7 8 9 10 11',
413                  content='',
414                  contentless_delete=1,
415                  tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
416                );
417                CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
418                  INSERT INTO fakespot_fts(rowid, title)
419                    SELECT id, title
420                    FROM suggestions
421                    WHERE id = new.suggestion_id;
422                END;
423                ",
424                )?;
425                Ok(())
426            }
427            21 => {
428                // Drop and re-create the fakespot_custom_details to add the icon_id column.
429                tx.execute_batch(
430                    "
431                    DROP TABLE fakespot_custom_details;
432                    CREATE TABLE fakespot_custom_details(
433                        suggestion_id INTEGER PRIMARY KEY,
434                        fakespot_grade TEXT NOT NULL,
435                        product_id TEXT NOT NULL,
436                        rating REAL NOT NULL,
437                        total_reviews INTEGER NOT NULL,
438                        icon_id TEXT,
439                        FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
440                    );
441                    CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
442                      INSERT INTO fakespot_fts(rowid, title)
443                        SELECT id, title
444                        FROM suggestions
445                        WHERE id = new.suggestion_id;
446                    END;
447                    ",
448                )?;
449                Ok(())
450            }
451            22 => {
452                // Drop and re-create the fakespot_fts table to remove the prefix index param
453                tx.execute_batch(
454                    "
455                    DROP TABLE fakespot_fts;
456                    CREATE VIRTUAL TABLE fakespot_fts USING FTS5(
457                      title,
458                      content='',
459                      contentless_delete=1,
460                      tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
461                    );
462                    ",
463                )?;
464                Ok(())
465            }
466            23 => {
467                // Drop all suggestions, then recreate the fakespot_custom_details table to add the
468                // `keywords` and `product_type` fields.
469                clear_database(tx)?;
470                tx.execute_batch(
471                    "
472                    DROP TABLE fakespot_custom_details;
473                    CREATE TABLE fakespot_custom_details(
474                        suggestion_id INTEGER PRIMARY KEY,
475                        fakespot_grade TEXT NOT NULL,
476                        product_id TEXT NOT NULL,
477                        keywords TEXT NOT NULL,
478                        product_type TEXT NOT NULL,
479                        rating REAL NOT NULL,
480                        total_reviews INTEGER NOT NULL,
481                        icon_id TEXT,
482                        FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
483                    );
484                    CREATE TRIGGER fakespot_ai AFTER INSERT ON fakespot_custom_details BEGIN
485                      INSERT INTO fakespot_fts(rowid, title)
486                        SELECT id, title
487                        FROM suggestions
488                        WHERE id = new.suggestion_id;
489                    END;
490                    ",
491                )?;
492                Ok(())
493            }
494            24 => {
495                // Clear the database so that we re-ingest and populate the ingested_records table.
496                clear_database(tx)?;
497                tx.execute_batch(
498                    "
499                    CREATE TABLE rs_cache(
500                        collection TEXT PRIMARY KEY,
501                        data TEXT NOT NULL
502                    ) WITHOUT ROWID;
503                    CREATE TABLE ingested_records(
504                        id TEXT,
505                        collection TEXT,
506                        type TEXT NOT NULL,
507                        last_modified INTEGER NOT NULL,
508                        PRIMARY KEY (id, collection)
509                    ) WITHOUT ROWID;
510                    ",
511                )?;
512                Ok(())
513            }
514            25 => {
515                // Create the exposure suggestions table and index.
516                tx.execute_batch(
517                    "
518                    CREATE TABLE exposure_custom_details(
519                        suggestion_id INTEGER PRIMARY KEY,
520                        type TEXT NOT NULL,
521                        FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
522                    );
523                    CREATE INDEX exposure_custom_details_type ON exposure_custom_details(type);
524                    ",
525                )?;
526                Ok(())
527            }
528            26 => {
529                // Create tables related to city-based weather.
530                tx.execute_batch(
531                    "
532                    CREATE TABLE keywords_metrics(
533                        record_id TEXT NOT NULL PRIMARY KEY,
534                        provider INTEGER NOT NULL,
535                        max_length INTEGER NOT NULL,
536                        max_word_count INTEGER NOT NULL
537                    ) WITHOUT ROWID;
538
539                    CREATE TABLE geonames(
540                        id INTEGER PRIMARY KEY,
541                        record_id TEXT NOT NULL,
542                        name TEXT NOT NULL,
543                        feature_class TEXT NOT NULL,
544                        feature_code TEXT NOT NULL,
545                        country_code TEXT NOT NULL,
546                        admin1_code TEXT NOT NULL,
547                        population INTEGER
548                    );
549                    CREATE INDEX geonames_feature_class ON geonames(feature_class);
550                    CREATE INDEX geonames_feature_code ON geonames(feature_code);
551
552                    CREATE TABLE geonames_alternates(
553                        name TEXT NOT NULL,
554                        geoname_id INTEGER NOT NULL,
555                        PRIMARY KEY (name, geoname_id),
556                        FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE
557                    ) WITHOUT ROWID;
558
559                    CREATE TABLE geonames_metrics(
560                        record_id TEXT NOT NULL PRIMARY KEY,
561                        max_name_length INTEGER NOT NULL,
562                        max_name_word_count INTEGER NOT NULL
563                    ) WITHOUT ROWID;
564                    ",
565                )?;
566                Ok(())
567            }
568            27 => {
569                // Add latitude and longitude to the geonames table. Clear the
570                // database so geonames are reingested.
571                clear_database(tx)?;
572                tx.execute_batch(
573                    "
574                    DROP INDEX geonames_feature_class;
575                    DROP INDEX geonames_feature_code;
576                    DROP TABLE geonames;
577                    CREATE TABLE geonames(
578                        id INTEGER PRIMARY KEY,
579                        record_id TEXT NOT NULL,
580                        name TEXT NOT NULL,
581                        latitude REAL NOT NULL,
582                        longitude REAL NOT NULL,
583                        feature_class TEXT NOT NULL,
584                        feature_code TEXT NOT NULL,
585                        country_code TEXT NOT NULL,
586                        admin1_code TEXT NOT NULL,
587                        population INTEGER NOT NULL
588                    );
589                    CREATE INDEX geonames_feature_class ON geonames(feature_class);
590                    CREATE INDEX geonames_feature_code ON geonames(feature_code);
591                    ",
592                )?;
593                Ok(())
594            }
595            28 => {
596                // Add `iso_language` column to `geonames_alternates`. Clear the
597                // database so geonames are reingested.
598                clear_database(tx)?;
599                tx.execute_batch(
600                    "
601                    DROP TABLE geonames_alternates;
602                    CREATE TABLE geonames_alternates(
603                        name TEXT NOT NULL,
604                        geoname_id INTEGER NOT NULL,
605                        -- The value of the `iso_language` field for the alternate. This will be
606                        -- null for the alternate we artificially create for the `name` in the
607                        -- corresponding geoname record.
608                        iso_language TEXT,
609                        PRIMARY KEY (name, geoname_id),
610                        FOREIGN KEY(geoname_id) REFERENCES geonames(id) ON DELETE CASCADE
611                    ) WITHOUT ROWID;
612                    ",
613                )?;
614                Ok(())
615            }
616            29 => {
617                // This migration only clears the database because some tables
618                // that should have been cleared in previous migrations were
619                // not.
620                clear_database(tx)?;
621                Ok(())
622            }
623            30 => {
624                // Add the `geonames_alternates_geoname_id` index.
625                clear_database(tx)?;
626                tx.execute_batch(
627                    "
628                    CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id);
629                    ",
630                )?;
631                Ok(())
632            }
633            31 => {
634                // Need to clear the database so that the FTS index will get filled.
635                clear_database(tx)?;
636                tx.execute_batch(
637                    "
638                    CREATE VIRTUAL TABLE IF NOT EXISTS amp_fts USING FTS5(
639                      full_keywords,
640                      title,
641                      content='',
642                      contentless_delete=1,
643                      tokenize=\"porter unicode61 remove_diacritics 2 tokenchars '''-'\"
644                    );
645
646                    ",
647                )?;
648                Ok(())
649            }
650            32 => {
651                // Drop rs_cache since it's no longer needed.
652                tx.execute_batch("DROP TABLE rs_cache;")?;
653                Ok(())
654            }
655            33 => {
656                // This migration is due to the replacement of the
657                // `quicksuggest` collection with `quicksuggest-amp` and
658                // `quicksuggest-other`. Clear the DB so that records from the
659                // old collection don't stick around. See bug 1953945.
660                clear_database(tx)?;
661                Ok(())
662            }
663            34 => {
664                // Replace the exposure suggestions table and index with the
665                // dynamic suggestions table and index.
666                tx.execute_batch(
667                    r#"
668                    DROP INDEX exposure_custom_details_type;
669                    DROP TABLE exposure_custom_details;
670                    CREATE TABLE dynamic_custom_details(
671                        suggestion_id INTEGER PRIMARY KEY,
672                        suggestion_type TEXT NOT NULL,
673                        json_data TEXT,
674                        FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
675                    );
676                    CREATE INDEX dynamic_custom_details_suggestion_type ON dynamic_custom_details(suggestion_type);
677                    "#,
678                )?;
679                Ok(())
680            }
681            35 => {
682                // The commit that added this migration was reverted.
683                Ok(())
684            }
685            36 => {
686                tx.execute_batch("DROP TABLE IF EXISTS yelp_location_signs;")?;
687                Ok(())
688            }
689            37 => {
690                clear_database(tx)?;
691                tx.execute_batch(
692                    "
693                    DROP TABLE yelp_subjects;
694                    CREATE TABLE yelp_subjects(
695                        keyword TEXT PRIMARY KEY,
696                        subject_type INTEGER NOT NULL DEFAULT 0,
697                        record_id TEXT NOT NULL
698                    ) WITHOUT ROWID;
699                    ",
700                )?;
701                Ok(())
702            }
703            38 => {
704                // This migration makes changes to geonames.
705                tx.execute_batch(
706                    r#"
707                    DROP INDEX geonames_alternates_geoname_id;
708                    DROP TABLE geonames_alternates;
709
710                    DROP INDEX geonames_feature_class;
711                    DROP INDEX geonames_feature_code;
712                    DROP TABLE geonames;
713
714                    CREATE TABLE geonames(
715                        id INTEGER PRIMARY KEY,
716                        record_id TEXT NOT NULL,
717                        name TEXT NOT NULL,
718                        feature_class TEXT NOT NULL,
719                        feature_code TEXT NOT NULL,
720                        country_code TEXT NOT NULL,
721                        admin1_code TEXT,
722                        admin2_code TEXT,
723                        admin3_code TEXT,
724                        admin4_code TEXT,
725                        population INTEGER,
726                        latitude TEXT,
727                        longitude TEXT
728                    );
729
730                    CREATE TABLE geonames_alternates(
731                        record_id TEXT NOT NULL,
732                        geoname_id INTEGER NOT NULL,
733                        language TEXT,
734                        name TEXT NOT NULL COLLATE geonames_collate,
735                        PRIMARY KEY(geoname_id, language, name)
736                    );
737                    CREATE INDEX geonames_alternates_geoname_id ON geonames_alternates(geoname_id);
738                    CREATE INDEX geonames_alternates_name ON geonames_alternates(name);
739
740                    CREATE TRIGGER geonames_alternates_delete AFTER DELETE ON geonames BEGIN
741                        DELETE FROM geonames_alternates
742                        WHERE geoname_id = old.id;
743                    END;
744                    "#,
745                )?;
746                Ok(())
747            }
748            39 => {
749                // This migration makes changes to keywords metrics.
750                clear_database(tx)?;
751                tx.execute_batch(
752                    r#"
753                    DROP TABLE keywords_metrics;
754                    CREATE TABLE keywords_metrics(
755                        record_id TEXT NOT NULL PRIMARY KEY,
756                        record_type TEXT NOT NULL,
757                        max_len INTEGER NOT NULL,
758                        max_word_count INTEGER NOT NULL
759                    ) WITHOUT ROWID;
760                    CREATE INDEX keywords_metrics_record_type ON keywords_metrics(record_type);
761                    "#,
762                )?;
763                Ok(())
764            }
765            40 => {
766                // This migration makes changes to geonames.
767                clear_database(tx)?;
768                tx.execute_batch(
769                    r#"
770                    DROP INDEX geonames_alternates_geoname_id;
771                    DROP INDEX geonames_alternates_name;
772                    DROP TABLE geonames_alternates;
773
774                    CREATE TABLE geonames_alternates(
775                        id INTEGER PRIMARY KEY,
776                        record_id TEXT NOT NULL,
777                        geoname_id INTEGER NOT NULL,
778                        language TEXT,
779                        name TEXT NOT NULL COLLATE geonames_collate,
780                        is_preferred INTEGER,
781                        is_short INTEGER
782                    );
783                    CREATE INDEX geonames_alternates_geoname_id_language
784                        ON geonames_alternates(geoname_id, language);
785                    CREATE INDEX geonames_alternates_name
786                        ON geonames_alternates(name);
787                    "#,
788                )?;
789                Ok(())
790            }
791            41 => {
792                // This migration introduces the `keywords_i18n` table and makes
793                // changes to how keywords metrics are calculated. Clear the DB
794                // so that weather and geonames names are added to the new table
795                // and also so that keywords metrics are recalculated.
796                clear_database(tx)?;
797                tx.execute_batch(
798                    r#"
799                    CREATE TABLE keywords_i18n(
800                        keyword TEXT NOT NULL COLLATE i18n_collate,
801                        suggestion_id INTEGER NOT NULL,
802                        full_keyword_id INTEGER NULL,
803                        rank INTEGER NOT NULL,
804                        PRIMARY KEY (keyword, suggestion_id)
805                    ) WITHOUT ROWID;
806                    "#,
807                )?;
808                Ok(())
809            }
810
811            _ => Err(open_database::Error::IncompatibleVersion(version)),
812        }
813    }
814}
815
816/// Clears the database, removing all suggestions, icons, and metadata.
817pub fn clear_database(db: &Connection) -> rusqlite::Result<()> {
818    // If you update this, you probably need to update
819    // `SuggestDao::drop_suggestions` too!
820
821    db.execute_batch(
822        "
823        DELETE FROM meta;
824        DELETE FROM keywords;
825        DELETE FROM full_keywords;
826        DELETE FROM prefix_keywords;
827        DELETE FROM suggestions;
828        DELETE FROM icons;
829        DELETE FROM yelp_subjects;
830        DELETE FROM yelp_modifiers;
831        DELETE FROM yelp_custom_details;
832        ",
833    )?;
834    let conditional_tables = [
835        "fakespot_fts",
836        "geonames",
837        "geonames_metrics",
838        "ingested_records",
839        "keywords_i18n",
840        "keywords_metrics",
841    ];
842    for t in conditional_tables {
843        let table_exists = db.exists("SELECT 1 FROM sqlite_master WHERE name = ?", [t])?;
844        if table_exists {
845            db.execute(&format!("DELETE FROM {t}"), ())?;
846        }
847    }
848    Ok(())
849}
850
851#[cfg(test)]
852mod test {
853    use super::*;
854    use sql_support::open_database::test_utils::MigratedDatabaseFile;
855
856    // Snapshot of the v16 schema.  We use this to test that we can migrate from there to the
857    // current schema.
858    const V16_SCHEMA: &str = r#"
859CREATE TABLE meta(
860    key TEXT PRIMARY KEY,
861    value NOT NULL
862) WITHOUT ROWID;
863
864CREATE TABLE keywords(
865    keyword TEXT NOT NULL,
866    suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
867    full_keyword_id INTEGER NULL REFERENCES full_keywords(id) ON DELETE SET NULL,
868    rank INTEGER NOT NULL,
869    PRIMARY KEY (keyword, suggestion_id)
870) WITHOUT ROWID;
871
872-- full keywords are what we display to the user when a (partial) keyword matches
873-- The FK to suggestion_id makes it so full keywords get deleted when the parent suggestion is deleted.
874CREATE TABLE full_keywords(
875    id INTEGER PRIMARY KEY,
876    suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
877    full_keyword TEXT NOT NULL
878);
879
880CREATE TABLE prefix_keywords(
881    keyword_prefix TEXT NOT NULL,
882    keyword_suffix TEXT NOT NULL DEFAULT '',
883    confidence INTEGER NOT NULL DEFAULT 0,
884    rank INTEGER NOT NULL,
885    suggestion_id INTEGER NOT NULL REFERENCES suggestions(id) ON DELETE CASCADE,
886    PRIMARY KEY (keyword_prefix, keyword_suffix, suggestion_id)
887) WITHOUT ROWID;
888
889CREATE UNIQUE INDEX keywords_suggestion_id_rank ON keywords(suggestion_id, rank);
890
891CREATE TABLE suggestions(
892    id INTEGER PRIMARY KEY,
893    record_id TEXT NOT NULL,
894    provider INTEGER NOT NULL,
895    title TEXT NOT NULL,
896    url TEXT NOT NULL,
897    score REAL NOT NULL
898);
899
900CREATE TABLE amp_custom_details(
901    suggestion_id INTEGER PRIMARY KEY,
902    advertiser TEXT NOT NULL,
903    block_id INTEGER NOT NULL,
904    iab_category TEXT NOT NULL,
905    impression_url TEXT NOT NULL,
906    click_url TEXT NOT NULL,
907    icon_id TEXT NOT NULL,
908    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
909);
910
911CREATE TABLE wikipedia_custom_details(
912    suggestion_id INTEGER PRIMARY KEY REFERENCES suggestions(id) ON DELETE CASCADE,
913    icon_id TEXT NOT NULL
914);
915
916CREATE TABLE amo_custom_details(
917    suggestion_id INTEGER PRIMARY KEY,
918    description TEXT NOT NULL,
919    guid TEXT NOT NULL,
920    icon_url TEXT NOT NULL,
921    rating TEXT,
922    number_of_ratings INTEGER NOT NULL,
923    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
924);
925
926CREATE INDEX suggestions_record_id ON suggestions(record_id);
927
928CREATE TABLE icons(
929    id TEXT PRIMARY KEY,
930    data BLOB NOT NULL,
931    mimetype TEXT NOT NULL
932) WITHOUT ROWID;
933
934CREATE TABLE yelp_subjects(
935    keyword TEXT PRIMARY KEY,
936    record_id TEXT NOT NULL
937) WITHOUT ROWID;
938
939CREATE TABLE yelp_modifiers(
940    type INTEGER NOT NULL,
941    keyword TEXT NOT NULL,
942    record_id TEXT NOT NULL,
943    PRIMARY KEY (type, keyword)
944) WITHOUT ROWID;
945
946CREATE TABLE yelp_location_signs(
947    keyword TEXT PRIMARY KEY,
948    need_location INTEGER NOT NULL,
949    record_id TEXT NOT NULL
950) WITHOUT ROWID;
951
952CREATE TABLE yelp_custom_details(
953    icon_id TEXT PRIMARY KEY,
954    score REAL NOT NULL,
955    record_id TEXT NOT NULL
956) WITHOUT ROWID;
957
958CREATE TABLE mdn_custom_details(
959    suggestion_id INTEGER PRIMARY KEY,
960    description TEXT NOT NULL,
961    FOREIGN KEY(suggestion_id) REFERENCES suggestions(id) ON DELETE CASCADE
962);
963
964PRAGMA user_version=16;
965"#;
966
967    /// Test running all schema upgrades from V16, which was the first schema with a "real"
968    /// migration.
969    ///
970    /// If an upgrade fails, then this test will fail with a panic.
971    #[test]
972    fn test_all_upgrades() {
973        let db_file =
974            MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
975        db_file.run_all_upgrades();
976        db_file.assert_schema_matches_new_database();
977    }
978
979    /// Test that `clear_database()` works correctly during migrations.
980    ///
981    /// TODO: This only checks `ingested_records` for now since it's very
982    /// important, but ideally this would test all tables.
983    #[test]
984    fn test_clear_database() -> anyhow::Result<()> {
985        // Start with the v16 schema.
986        let db_file =
987            MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
988
989        // Upgrade to v25, the first version with with `ingested_records` tables.
990        db_file.upgrade_to(25);
991
992        // Insert some ingested records.
993        let conn = db_file.open();
994        conn.execute(
995            "INSERT INTO ingested_records(id, collection, type, last_modified) VALUES(?, ?, ?, ?)",
996            ("record-id", "quicksuggest", "record-type", 1),
997        )?;
998        conn.close().expect("Connection should be closed");
999
1000        // Finish upgrading to the current version.
1001        db_file.upgrade_to(VERSION);
1002        db_file.assert_schema_matches_new_database();
1003
1004        // `ingested_records` should be empty.
1005        let conn = db_file.open();
1006        assert_eq!(
1007            conn.query_one::<i32>("SELECT count(*) FROM ingested_records")?,
1008            0,
1009            "ingested_records should be empty"
1010        );
1011        conn.close().expect("Connection should be closed");
1012
1013        Ok(())
1014    }
1015
1016    /// Test that yelp_location_signs table could be removed correctly.
1017    #[test]
1018    fn test_remove_yelp_location_signs_table() -> anyhow::Result<()> {
1019        // Start with the v16 schema.
1020        let db_file =
1021            MigratedDatabaseFile::new(SuggestConnectionInitializer::default(), V16_SCHEMA);
1022
1023        // Upgrade to v36.
1024        db_file.upgrade_to(36);
1025
1026        // Drop the table to simulate old 35 > 36 migration.
1027        let conn = db_file.open();
1028        conn.execute("DROP table yelp_location_signs", ())?;
1029        conn.close().expect("Connection should be closed");
1030
1031        // Finish upgrading to the current version.
1032        db_file.upgrade_to(VERSION);
1033        db_file.assert_schema_matches_new_database();
1034
1035        Ok(())
1036    }
1037}