suggest/
geoname.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 */
5
6/// GeoNames support. GeoNames is an open-source geographical database of place
7/// names worldwide, including cities, regions, and countries [1]. Notably it's
8/// used by MaxMind's databases [2]. We use GeoNames to detect city and region
9/// names and to map cities to regions. Specifically we use the data at [3];
10/// also see [3] for documentation.
11///
12/// [1]: https://www.geonames.org/
13/// [2]: https://www.maxmind.com/en/geoip-databases
14/// [3]: https://download.geonames.org/export/dump/
15use rusqlite::{named_params, Connection};
16use serde::Deserialize;
17use sql_support::ConnExt;
18use std::{
19    cell::OnceCell,
20    collections::HashMap,
21    hash::{Hash, Hasher},
22};
23
24use crate::{
25    db::{KeywordsMetrics, KeywordsMetricsUpdater, SuggestDao},
26    error::RusqliteResultExt,
27    metrics::MetricsContext,
28    rs::{Client, Record, SuggestRecordId, SuggestRecordType},
29    store::SuggestStoreInner,
30    Result,
31};
32
33/// The type of a geoname.
34#[derive(Clone, Debug, Eq, Hash, PartialEq, uniffi::Enum)]
35pub enum GeonameType {
36    Country,
37    /// A state, province, prefecture, district, borough, etc.
38    AdminDivision {
39        level: u8,
40    },
41    AdminDivisionOther,
42    /// A city, town, village, populated place, etc.
43    City,
44    Other,
45}
46
47pub type GeonameId = i64;
48
49/// A single geographic place.
50///
51/// This corresponds to a single row in the main "geoname" table described in
52/// the GeoNames documentation [1]. We exclude fields we don't need.
53///
54/// [1]: https://download.geonames.org/export/dump/readme.txt
55#[derive(Clone, Debug, Eq, PartialEq, uniffi::Record)]
56pub struct Geoname {
57    /// The `geonameid` straight from the geoname table.
58    pub geoname_id: GeonameId,
59    /// The geoname type. This is derived from `feature_class` and
60    /// `feature_code` as a more convenient representation of the type.
61    pub geoname_type: GeonameType,
62    /// The place's primary name.
63    pub name: String,
64    /// ISO-3166 two-letter uppercase country code, e.g., "US".
65    pub country_code: String,
66    /// Primary geoname category. Examples:
67    ///
68    /// "PCLI" - Independent political entity: country
69    /// "A" - Administrative division: state, province, borough, district, etc.
70    /// "P" - Populated place: city, village, etc.
71    pub feature_class: String,
72    /// Secondary geoname category, depends on `feature_class`. Examples:
73    ///
74    /// "ADM1" - Administrative division 1
75    /// "PPL" - Populated place like a city
76    pub feature_code: String,
77    /// Administrative divisions. This maps admin division levels (1-based) to
78    /// their corresponding codes. For example, Liverpool has two admin
79    /// divisions: "ENG" at level 1 and "H8" at level 2. They would be
80    /// represented in this map with entries `(1, "ENG")` and `(2, "H8")`.
81    pub admin_division_codes: HashMap<u8, String>,
82    /// Population size.
83    pub population: u64,
84    /// Latitude in decimal degrees (as a string).
85    pub latitude: String,
86    /// Longitude in decimal degrees (as a string).
87    pub longitude: String,
88}
89
90/// Alternate names for a geoname and its country and admin divisions.
91#[derive(Clone, Debug, Eq, PartialEq, uniffi::Record)]
92pub struct GeonameAlternates {
93    /// Names for the geoname itself.
94    geoname: AlternateNames,
95    /// Names for the geoname's country. This will be `Some` as long as the
96    /// country is also in the ingested data, which should typically be true.
97    country: Option<AlternateNames>,
98    /// Names for the geoname's admin divisions. This is parallel to
99    /// `Geoname::admin_division_codes`. If there are no names in the ingested
100    /// data for an admin division, then it will be absent from this map.
101    admin_divisions: HashMap<u8, AlternateNames>,
102}
103
104/// A set of names for a single entity.
105#[derive(Clone, Debug, Eq, PartialEq, uniffi::Record)]
106pub struct AlternateNames {
107    /// The entity's primary name. For a `Geoname`, this is `Geoname::name`.
108    primary: String,
109    /// The entity's name in the language that was ingested according to the
110    /// locale in the remote settings context. If none exists and this
111    /// `AlternateNames` is for a `Geoname`, then this will be its primary name.
112    localized: Option<String>,
113    /// The entity's abbreviation, if any.
114    abbreviation: Option<String>,
115}
116
117impl Geoname {
118    /// Whether `self` and `other` are related. For example, if one is a city
119    /// and the other is an administrative division, this will return `true` if
120    /// the city is located in the division.
121    pub fn is_related_to(&self, other: &Self) -> bool {
122        if self.country_code != other.country_code {
123            return false;
124        }
125
126        let self_level = self.admin_level();
127        let other_level = other.admin_level();
128
129        // Build a sorted vec of levels in both `self and `other`.
130        let mut levels_asc: Vec<_> = self
131            .admin_division_codes
132            .keys()
133            .chain(other.admin_division_codes.keys())
134            .copied()
135            .collect();
136        levels_asc.sort();
137
138        // Each admin level needs to be the same in `self` and `other` up to the
139        // minimum level of `self` and `other`.
140        for level in levels_asc {
141            if self_level < level || other_level < level {
142                break;
143            }
144            if self.admin_division_codes.get(&level) != other.admin_division_codes.get(&level) {
145                return false;
146            }
147        }
148
149        // At this point, admin levels are the same up to the minimum level. If
150        // the types of `self` and `other` aren't the same, then one is an admin
151        // division of the other. If they are the same type, then they need to
152        // be the same geoname.
153        self.geoname_type != other.geoname_type || self.geoname_id == other.geoname_id
154    }
155
156    fn admin_level(&self) -> u8 {
157        match self.geoname_type {
158            GeonameType::Country => 0,
159            GeonameType::AdminDivision { level } => level,
160            _ => 4,
161        }
162    }
163}
164
165impl Hash for Geoname {
166    fn hash<H: Hasher>(&self, state: &mut H) {
167        self.geoname_id.hash(state)
168    }
169}
170
171/// A fetched geoname with info on how it was matched.
172#[derive(Clone, Debug, Eq, PartialEq, uniffi::Record)]
173pub struct GeonameMatch {
174    /// The geoname that was matched.
175    pub geoname: Geoname,
176    /// The type of name that was matched.
177    pub match_type: GeonameMatchType,
178    /// Whether the name was matched by prefix.
179    pub prefix: bool,
180}
181
182#[derive(Clone, Debug, Eq, PartialEq, uniffi::Enum)]
183pub enum GeonameMatchType {
184    Abbreviation,
185    AirportCode,
186    /// This includes any names that aren't abbreviations or airport codes.
187    Name,
188}
189
190impl GeonameMatchType {
191    pub fn is_abbreviation(&self) -> bool {
192        matches!(self, GeonameMatchType::Abbreviation)
193    }
194
195    pub fn is_airport_code(&self) -> bool {
196        matches!(self, GeonameMatchType::AirportCode)
197    }
198
199    pub fn is_name(&self) -> bool {
200        matches!(self, GeonameMatchType::Name)
201    }
202}
203
204/// This data is used to service every query handled by the weather provider and
205/// potentially other providers, so we cache it from the DB.
206#[derive(Debug, Default)]
207pub struct GeonameCache {
208    pub keywords_metrics: KeywordsMetrics,
209}
210
211/// See `Geoname` for documentation.
212#[derive(Clone, Debug, Deserialize)]
213struct DownloadedGeoname {
214    id: GeonameId,
215    name: String,
216    ascii_name: Option<String>,
217    feature_class: String,
218    feature_code: String,
219    country: String,
220    admin1: Option<String>,
221    admin2: Option<String>,
222    admin3: Option<String>,
223    admin4: Option<String>,
224    population: Option<u64>,
225    latitude: Option<String>,
226    longitude: Option<String>,
227}
228
229#[derive(Clone, Debug, Deserialize)]
230struct DownloadedGeonamesAlternatesAttachment {
231    /// The language of the names in this attachment as a lowercase ISO 639
232    /// code: "en", "de", "fr", etc. Can also be a geonames pseduo-language like
233    /// "abbr" for abbreviations and "iata" for airport codes.
234    language: String,
235    /// Tuples of geoname IDs and their alternate names.
236    alternates_by_geoname_id: Vec<(GeonameId, Vec<DownloadedGeonamesAlternate<String>>)>,
237}
238
239#[derive(Clone, Debug, Deserialize)]
240#[serde(untagged)]
241enum DownloadedGeonamesAlternate<S: AsRef<str>> {
242    Name(S),
243    Full {
244        name: S,
245        is_preferred: Option<bool>,
246        is_short: Option<bool>,
247    },
248}
249
250impl<S: AsRef<str>> DownloadedGeonamesAlternate<S> {
251    fn name(&self) -> &str {
252        match self {
253            Self::Name(name) => name.as_ref(),
254            Self::Full { name, .. } => name.as_ref(),
255        }
256    }
257
258    fn is_preferred(&self) -> bool {
259        match self {
260            Self::Name(_) => false,
261            Self::Full { is_preferred, .. } => is_preferred.unwrap_or(false),
262        }
263    }
264
265    fn is_short(&self) -> bool {
266        match self {
267            Self::Name(_) => false,
268            Self::Full { is_short, .. } => is_short.unwrap_or(false),
269        }
270    }
271}
272
273impl SuggestDao<'_> {
274    /// Fetches geonames that have at least one name matching the `query`
275    /// string.
276    ///
277    /// `match_name_prefix` determines whether prefix matching is performed on
278    /// names that aren't abbreviations and airport codes. When `true`, names
279    /// that start with `query` will match. When false, names that equal `query`
280    /// will match. Prefix matching is never performed on abbreviations and
281    /// airport codes because we don't currently have a use case for that.
282    ///
283    /// `filter` restricts returned geonames to those that are related to the
284    /// ones in the filter. Cities can be restricted to administrative divisions
285    /// by including the divisions in `filter` and vice versa. This is
286    /// especially useful since place names are not unique. `filter` is
287    /// conjunctive: All geonames in `filter` must be related to a geoname in
288    /// order for it to be filtered in.
289    ///
290    /// The returned matches will include all matching types for a geoname, one
291    /// match per type per geoname. For example, if the query matches both a
292    /// geoname's name and abbreviation, two matches for that geoname will be
293    /// returned: one with a `match_type` of `GeonameMatchType::Name` and one
294    /// with a `match_type` of `GeonameMatchType::Abbreviation`. `prefix` is set
295    /// according to whether the query matched a prefix of the given type.
296    pub fn fetch_geonames(
297        &self,
298        query: &str,
299        match_name_prefix: bool,
300        filter: Option<Vec<&Geoname>>,
301    ) -> Result<Vec<GeonameMatch>> {
302        let candidate_name = query;
303        Ok(self
304            .conn
305            .query_rows_and_then_cached(
306                r#"
307                SELECT
308                    g.id,
309                    g.name,
310                    g.feature_class,
311                    g.feature_code,
312                    g.country_code,
313                    g.admin1_code,
314                    g.admin2_code,
315                    g.admin3_code,
316                    g.admin4_code,
317                    g.population,
318                    g.latitude,
319                    g.longitude,
320                    a.name != :name AS prefix,
321                    (SELECT CASE
322                         -- abbreviation
323                         WHEN a.language = 'abbr' THEN 1
324                         -- airport code
325                         WHEN a.language IN ('iata', 'icao', 'faac') THEN 2
326                         -- name
327                         ELSE 3
328                         END
329                    ) AS match_type
330                FROM
331                    geonames g
332                JOIN
333                    geonames_alternates a ON g.id = a.geoname_id
334                WHERE
335                    a.name = :name
336                    OR (
337                        :prefix
338                        AND match_type = 3
339                        AND (a.name BETWEEN :name AND :name || X'FFFF')
340                    )
341                GROUP BY
342                    g.id, match_type
343                ORDER BY
344                    g.feature_class = 'P' DESC, g.population DESC, g.id ASC, a.language ASC
345                "#,
346                named_params! {
347                    ":name": candidate_name,
348                    ":prefix": match_name_prefix,
349                },
350                |row| -> Result<Option<GeonameMatch>> {
351                    let feature_class: String = row.get("feature_class")?;
352                    let feature_code: String = row.get("feature_code")?;
353                    let geoname_type = match feature_class.as_str() {
354                        "A" => {
355                            if feature_code.starts_with("P") {
356                                GeonameType::Country
357                            } else {
358                                match feature_code.as_str() {
359                                    "ADM1" => GeonameType::AdminDivision { level: 1 },
360                                    "ADM2" => GeonameType::AdminDivision { level: 2 },
361                                    "ADM3" => GeonameType::AdminDivision { level: 3 },
362                                    "ADM4" => GeonameType::AdminDivision { level: 4 },
363                                    _ => GeonameType::AdminDivisionOther,
364                                }
365                            }
366                        }
367                        "P" => GeonameType::City,
368                        _ => GeonameType::Other,
369                    };
370                    let g_match = GeonameMatch {
371                        geoname: Geoname {
372                            geoname_id: row.get("id")?,
373                            geoname_type,
374                            name: row.get("name")?,
375                            feature_class,
376                            feature_code,
377                            country_code: row.get("country_code")?,
378                            admin_division_codes: [
379                                row.get::<_, Option<String>>("admin1_code")?.map(|c| (1, c)),
380                                row.get::<_, Option<String>>("admin2_code")?.map(|c| (2, c)),
381                                row.get::<_, Option<String>>("admin3_code")?.map(|c| (3, c)),
382                                row.get::<_, Option<String>>("admin4_code")?.map(|c| (4, c)),
383                            ]
384                            .into_iter()
385                            .flatten()
386                            .collect(),
387                            population: row
388                                .get::<_, Option<u64>>("population")?
389                                .unwrap_or_default(),
390                            latitude: row
391                                .get::<_, Option<String>>("latitude")?
392                                .unwrap_or_default(),
393                            longitude: row
394                                .get::<_, Option<String>>("longitude")?
395                                .unwrap_or_default(),
396                        },
397                        prefix: row.get("prefix")?,
398                        match_type: match row.get::<_, i32>("match_type")? {
399                            1 => GeonameMatchType::Abbreviation,
400                            2 => GeonameMatchType::AirportCode,
401                            _ => GeonameMatchType::Name,
402                        },
403                    };
404                    if let Some(geonames) = &filter {
405                        if geonames.iter().all(|g| g.is_related_to(&g_match.geoname)) {
406                            Ok(Some(g_match))
407                        } else {
408                            Ok(None)
409                        }
410                    } else {
411                        Ok(Some(g_match))
412                    }
413                },
414            )?
415            .into_iter()
416            .flatten()
417            .collect())
418    }
419
420    /// Fetches alternate names for a geoname and its country and admin
421    /// divisions.
422    pub fn fetch_geoname_alternates(&self, geoname: &Geoname) -> Result<GeonameAlternates> {
423        #[derive(Debug)]
424        struct Row {
425            geoname_id: GeonameId,
426            feature_code: String,
427            primary_name: String,
428            alt_language: Option<String>,
429            alt_name: String,
430        }
431
432        let rows = self.conn.query_rows_and_then_cached(
433            r#"
434            SELECT
435                g.id,
436                g.feature_code,
437                g.name AS primary_name,
438                a.language AS alt_language,
439                a.name AS alt_name
440            FROM
441                geonames g
442            JOIN
443                geonames_alternates a ON g.id = a.geoname_id
444            WHERE
445                -- Ignore airport codes
446                (a.language IS NULL OR a.language NOT IN ('iata', 'icao', 'faac'))
447                AND (
448                    -- The row matches the passed-in geoname
449                    g.id = :geoname_id
450                    -- The row matches the geoname's country
451                    OR (
452                        g.feature_code IN ('PCLI', 'PCL', 'PCLD', 'PCLF', 'PCLS')
453                        AND g.country_code = :country
454                    )
455                    -- The row matches one of the geoname's admin divisions
456                    OR (
457                        g.country_code = :country
458                        AND (
459                            (g.feature_code = 'ADM1' AND g.admin1_code = :admin1)
460                            OR (g.feature_code = 'ADM2' AND g.admin2_code = :admin2)
461                            OR (g.feature_code = 'ADM3' AND g.admin3_code = :admin3)
462                            OR (g.feature_code = 'ADM4' AND g.admin4_code = :admin4)
463                        )
464                    )
465                )
466            ORDER BY
467                -- Group rows for the same geoname together
468                g.id ASC,
469                -- Sort preferred and short names first; longer names tend to be
470                -- less commonly used ("California" vs. "State of California")
471                a.is_preferred DESC,
472                a.is_short DESC,
473                -- `a.language` is null for the primary and ASCII name (see
474                -- `insert_geonames`); sort those last
475                a.language IS NULL ASC,
476                -- Group by language; `a.language` should be either null, 'abbr'
477                -- for abbreviations, or the language code that was ingested
478                -- according to the locale in the RS context
479                a.language ASC,
480                -- Sort shorter names first, same reason as above
481                length(a.name) ASC,
482                a.name ASC
483            "#,
484            named_params! {
485                ":geoname_id": geoname.geoname_id,
486                ":country": geoname.country_code,
487                ":admin1": geoname.admin_division_codes.get(&1),
488                ":admin2": geoname.admin_division_codes.get(&2),
489                ":admin3": geoname.admin_division_codes.get(&3),
490                ":admin4": geoname.admin_division_codes.get(&4),
491            },
492            |row| -> Result<Row> {
493                Ok(Row {
494                    geoname_id: row.get("id")?,
495                    feature_code: row.get("feature_code")?,
496                    primary_name: row.get("primary_name")?,
497                    alt_language: row.get("alt_language")?,
498                    alt_name: row.get("alt_name")?,
499                })
500            },
501        )?;
502
503        let mut geoname_localized: OnceCell<String> = OnceCell::new();
504        let mut geoname_abbr: OnceCell<String> = OnceCell::new();
505        let mut country_primary: OnceCell<String> = OnceCell::new();
506        let mut country_localized: OnceCell<String> = OnceCell::new();
507        let mut country_abbr: OnceCell<String> = OnceCell::new();
508        let mut admin_primary: HashMap<u8, String> = HashMap::new();
509        let mut admin_localized: HashMap<u8, String> = HashMap::new();
510        let mut admin_abbr: HashMap<u8, String> = HashMap::new();
511
512        // Loop through the rows. For each of the geoname, country, and admin
513        // divisions, save the first primary name, localized name, abbreviation
514        // we encounter. The `ORDER BY` in the query ensures that these will be
515        // the best names for each (or what we guess will be the best names).
516        for row in rows.into_iter() {
517            if row.geoname_id == geoname.geoname_id {
518                match row.alt_language.as_deref() {
519                    Some("abbr") => geoname_abbr.get_or_init(|| row.alt_name),
520                    _ => geoname_localized.get_or_init(|| row.alt_name),
521                };
522            } else if let Some(level) = match row.feature_code.as_str() {
523                "ADM1" => Some(1),
524                "ADM2" => Some(2),
525                "ADM3" => Some(3),
526                "ADM4" => Some(4),
527                _ => None,
528            } {
529                admin_primary.entry(level).or_insert(row.primary_name);
530                match row.alt_language.as_deref() {
531                    Some("abbr") => admin_abbr.entry(level).or_insert(row.alt_name),
532                    _ => admin_localized.entry(level).or_insert(row.alt_name),
533                };
534            } else {
535                country_primary.get_or_init(|| row.primary_name);
536                match row.alt_language.as_deref() {
537                    Some("abbr") => country_abbr.get_or_init(|| row.alt_name),
538                    _ => country_localized.get_or_init(|| row.alt_name),
539                };
540            }
541        }
542
543        Ok(GeonameAlternates {
544            geoname: AlternateNames {
545                primary: geoname.name.clone(),
546                localized: geoname_localized.take(),
547                abbreviation: geoname_abbr.take(),
548            },
549            country: country_primary.take().map(|primary| AlternateNames {
550                primary,
551                localized: country_localized.take(),
552                abbreviation: country_abbr.take(),
553            }),
554            admin_divisions: geoname
555                .admin_division_codes
556                .keys()
557                .filter_map(|level| {
558                    admin_primary.remove(level).map(|primary| {
559                        (
560                            *level,
561                            AlternateNames {
562                                primary,
563                                localized: admin_localized.remove(level),
564                                abbreviation: admin_abbr.remove(level),
565                            },
566                        )
567                    })
568                })
569                .collect(),
570        })
571    }
572
573    /// Inserts GeoNames data into the database.
574    fn insert_geonames(
575        &mut self,
576        record_id: &SuggestRecordId,
577        geonames: &[DownloadedGeoname],
578    ) -> Result<()> {
579        self.scope.err_if_interrupted()?;
580
581        let mut geoname_insert = GeonameInsertStatement::new(self.conn)?;
582        let mut alt_insert = GeonameAlternateInsertStatement::new(self.conn)?;
583        let mut metrics_updater = KeywordsMetricsUpdater::new();
584
585        for geoname in geonames {
586            geoname_insert.execute(record_id, geoname)?;
587
588            // Add alternates for each geoname's primary name (`geoname.name`)
589            // and ASCII name. `language` is set to null for these alternates.
590            alt_insert.execute(
591                record_id,
592                geoname.id,
593                None, // language
594                &DownloadedGeonamesAlternate::Name(geoname.name.as_str()),
595            )?;
596            metrics_updater.update(&geoname.name);
597
598            if let Some(ascii_name) = &geoname.ascii_name {
599                alt_insert.execute(
600                    record_id,
601                    geoname.id,
602                    None, // language
603                    &DownloadedGeonamesAlternate::Name(ascii_name.as_str()),
604                )?;
605                metrics_updater.update(ascii_name);
606            }
607        }
608
609        metrics_updater.finish(
610            self.conn,
611            record_id,
612            SuggestRecordType::GeonamesAlternates,
613            &mut self.geoname_cache,
614        )?;
615
616        Ok(())
617    }
618
619    /// Inserts GeoNames alternates data into the database.
620    fn insert_geonames_alternates(
621        &mut self,
622        record_id: &SuggestRecordId,
623        attachments: &[DownloadedGeonamesAlternatesAttachment],
624    ) -> Result<()> {
625        let mut alt_insert = GeonameAlternateInsertStatement::new(self.conn)?;
626        let mut metrics_updater = KeywordsMetricsUpdater::new();
627        for attach in attachments {
628            for (geoname_id, alts) in &attach.alternates_by_geoname_id {
629                for alt in alts {
630                    alt_insert.execute(record_id, *geoname_id, Some(&attach.language), alt)?;
631                    metrics_updater.update(alt.name());
632                }
633            }
634        }
635        metrics_updater.finish(
636            self.conn,
637            record_id,
638            SuggestRecordType::GeonamesAlternates,
639            &mut self.geoname_cache,
640        )?;
641        Ok(())
642    }
643
644    pub fn geoname_cache(&self) -> &GeonameCache {
645        self.geoname_cache.get_or_init(|| GeonameCache {
646            keywords_metrics: self
647                .get_keywords_metrics(SuggestRecordType::GeonamesAlternates)
648                .unwrap_or_default(),
649        })
650    }
651}
652
653impl<S> SuggestStoreInner<S>
654where
655    S: Client,
656{
657    /// Inserts a GeoNames record into the database.
658    pub fn process_geonames_record(
659        &self,
660        dao: &mut SuggestDao,
661        record: &Record,
662        context: &mut MetricsContext,
663    ) -> Result<()> {
664        self.download_attachment(dao, record, context, |dao, record_id, data| {
665            dao.insert_geonames(record_id, data)
666        })
667    }
668
669    /// Inserts a GeoNames record into the database.
670    pub fn process_geonames_alternates_record(
671        &self,
672        dao: &mut SuggestDao,
673        record: &Record,
674        context: &mut MetricsContext,
675    ) -> Result<()> {
676        self.download_attachment(dao, record, context, |dao, record_id, data| {
677            dao.insert_geonames_alternates(record_id, data)
678        })
679    }
680}
681
682struct GeonameInsertStatement<'conn>(rusqlite::Statement<'conn>);
683
684impl<'conn> GeonameInsertStatement<'conn> {
685    fn new(conn: &'conn Connection) -> Result<Self> {
686        Ok(Self(conn.prepare(
687            "INSERT OR REPLACE INTO geonames(
688                 id,
689                 record_id,
690                 name,
691                 feature_class,
692                 feature_code,
693                 country_code,
694                 admin1_code,
695                 admin2_code,
696                 admin3_code,
697                 admin4_code,
698                 population,
699                 latitude,
700                 longitude
701             )
702             VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
703             ",
704        )?))
705    }
706
707    fn execute(&mut self, record_id: &SuggestRecordId, g: &DownloadedGeoname) -> Result<()> {
708        self.0
709            .execute(rusqlite::params![
710                &g.id,
711                record_id.as_str(),
712                &g.name,
713                &g.feature_class,
714                &g.feature_code,
715                &g.country,
716                &g.admin1,
717                &g.admin2,
718                &g.admin3,
719                &g.admin4,
720                &g.population,
721                &g.latitude,
722                &g.longitude,
723            ])
724            .with_context("geoname insert")?;
725        Ok(())
726    }
727}
728
729struct GeonameAlternateInsertStatement<'conn>(rusqlite::Statement<'conn>);
730
731impl<'conn> GeonameAlternateInsertStatement<'conn> {
732    fn new(conn: &'conn Connection) -> Result<Self> {
733        Ok(Self(conn.prepare(
734            r#"
735            INSERT INTO geonames_alternates(
736                record_id,
737                geoname_id,
738                language,
739                name,
740                is_preferred,
741                is_short
742            )
743            VALUES(?, ?, ?, ?, ?, ?)
744            "#,
745        )?))
746    }
747
748    fn execute<S: AsRef<str>>(
749        &mut self,
750        record_id: &SuggestRecordId,
751        geoname_id: GeonameId,
752        language: Option<&str>,
753        alt: &DownloadedGeonamesAlternate<S>,
754    ) -> Result<()> {
755        self.0
756            .execute((
757                record_id.as_str(),
758                geoname_id,
759                language,
760                alt.name(),
761                alt.is_preferred(),
762                alt.is_short(),
763            ))
764            .with_context("geoname alternate insert")?;
765        Ok(())
766    }
767}
768
769#[cfg(test)]
770pub(crate) mod tests {
771    use super::*;
772    use crate::{
773        provider::SuggestionProvider,
774        rs::{Collection, SuggestRecordType},
775        store::tests::TestStore,
776        testing::*,
777        SuggestIngestionConstraints,
778    };
779    use itertools::Itertools;
780    use serde_json::Value as JsonValue;
781
782    pub(crate) const LONG_NAME: &str = "123 aaa bbb ccc ddd eee fff ggg hhh iii jjj kkk lll mmm nnn ooo ppp qqq rrr sss ttt uuu vvv www x yyy zzz";
783
784    pub(crate) fn geoname_mock_record(id: &str, json: JsonValue) -> MockRecord {
785        MockRecord {
786            collection: Collection::Other,
787            record_type: SuggestRecordType::Geonames,
788            id: id.to_string(),
789            inline_data: None,
790            attachment: Some(MockAttachment::Json(json)),
791        }
792    }
793
794    pub(crate) fn geoname_alternates_mock_record(id: &str, json: JsonValue) -> MockRecord {
795        MockRecord {
796            collection: Collection::Other,
797            record_type: SuggestRecordType::GeonamesAlternates,
798            id: id.to_string(),
799            inline_data: None,
800            attachment: Some(MockAttachment::Json(json)),
801        }
802    }
803
804    pub(crate) fn new_test_store() -> TestStore {
805        TestStore::new(
806            MockRemoteSettingsClient::default()
807                .with_record(geoname_mock_record("geonames-0", geonames_data()))
808                .with_record(geoname_alternates_mock_record(
809                    "geonames-alternates-en",
810                    geonames_alternates_data_en(),
811                ))
812                .with_record(geoname_alternates_mock_record(
813                    "geonames-alternates-abbr",
814                    geonames_alternates_data_abbr(),
815                ))
816                .with_record(geoname_alternates_mock_record(
817                    "geonames-alternates-iata",
818                    geonames_alternates_data_iata(),
819                )),
820        )
821    }
822
823    fn geonames_data() -> serde_json::Value {
824        json!([
825            // Waterloo, AL
826            {
827                "id": 4096497,
828                "name": "Waterloo",
829                "feature_class": "P",
830                "feature_code": "PPL",
831                "country": "US",
832                "admin1": "AL",
833                "admin2": "077",
834                "population": 200,
835                "latitude": "34.91814",
836                "longitude": "-88.0642",
837            },
838
839            // AL
840            {
841                "id": 4829764,
842                "name": "Alabama",
843                "feature_class": "A",
844                "feature_code": "ADM1",
845                "country": "US",
846                "admin1": "AL",
847                "population": 4530315,
848                "latitude": "32.75041",
849                "longitude": "-86.75026",
850            },
851
852            // Waterloo, IA
853            {
854                "id": 4880889,
855                "name": "Waterloo",
856                "feature_class": "P",
857                "feature_code": "PPLA2",
858                "country": "US",
859                "admin1": "IA",
860                "admin2": "013",
861                "admin3": "94597",
862                "population": 68460,
863                "latitude": "42.49276",
864                "longitude": "-92.34296",
865            },
866
867            // IA
868            {
869                "id": 4862182,
870                "name": "Iowa",
871                "feature_class": "A",
872                "feature_code": "ADM1",
873                "country": "US",
874                "admin1": "IA",
875                "population": 2955010,
876                "latitude": "42.00027",
877                "longitude": "-93.50049",
878            },
879
880            // New York City
881            {
882                "id": 5128581,
883                "name": "New York City",
884                "feature_class": "P",
885                "feature_code": "PPL",
886                "country": "US",
887                "admin1": "NY",
888                "population": 8804190,
889                "latitude": "40.71427",
890                "longitude": "-74.00597",
891            },
892
893            // Rochester, NY
894            {
895                "id": 5134086,
896                "name": "Rochester",
897                "feature_class": "P",
898                "feature_code": "PPLA2",
899                "country": "US",
900                "admin1": "NY",
901                "admin2": "055",
902                "admin3": "63000",
903                "population": 209802,
904                "latitude": "43.15478",
905                "longitude": "-77.61556",
906            },
907
908            // NY state
909            {
910                "id": 5128638,
911                "name": "New York",
912                "feature_class": "A",
913                "feature_code": "ADM1",
914                "country": "US",
915                "admin1": "NY",
916                "population": 19274244,
917                "latitude": "43.00035",
918                "longitude": "-75.4999",
919            },
920
921            // Waco, TX: Has a surprising IATA airport code that's a
922            // common English word and not a prefix of the city name
923            {
924                "id": 4739526,
925                "name": "Waco",
926                "feature_class": "P",
927                "feature_code": "PPLA2",
928                "country": "US",
929                "admin1": "TX",
930                "admin2": "309",
931                "population": 132356,
932                "latitude": "31.54933",
933                "longitude": "-97.14667",
934            },
935
936            // TX
937            {
938                "id": 4736286,
939                "name": "Texas",
940                "feature_class": "A",
941                "feature_code": "ADM1",
942                "country": "US",
943                "admin1": "TX",
944                "population": 22875689,
945                "latitude": "31.25044",
946                "longitude": "-99.25061",
947            },
948
949            // New Orleans (shares a prefix with New York)
950            {
951                "id": 4335045,
952                "name": "New Orleans",
953                "feature_class": "P",
954                "feature_code": "PPLA2",
955                "country": "US",
956                "admin1": "LA",
957                "admin2": "071",
958                "admin3": "98000",
959                "population": 389617,
960                "latitude": "29.95465",
961                "longitude": "-90.07507",
962            },
963
964            // Carlsbad, CA (in San Diego county, name starts with "CA")
965            {
966                "id": 5334223,
967                "name": "Carlsbad",
968                "country": "US",
969                "feature_class": "P",
970                "feature_code": "PPL",
971                "admin1": "CA",
972                "admin2": "073",
973                "population": 114746,
974                "latitude": "33.15809",
975                "longitude": "-117.35059",
976            },
977
978            // San Diego
979            {
980                "id": 5391811,
981                "name": "San Diego",
982                "country": "US",
983                "feature_class": "P",
984                "feature_code": "PPLA2",
985                "admin1": "CA",
986                "admin2": "073",
987                "population": 1394928,
988                "latitude": "32.71571",
989                "longitude": "-117.16472",
990            },
991
992            // San Diego County (same name as the city)
993            {
994                "id": 5391832,
995                "name": "San Diego",
996                "country": "US",
997                "feature_class": "A",
998                "feature_code": "ADM2",
999                "admin1": "CA",
1000                "admin2": "073",
1001                "population": 3095313,
1002                "latitude": "33.0282",
1003                "longitude": "-116.77021",
1004            },
1005
1006            // CA
1007            {
1008                "id": 5332921,
1009                "name": "California",
1010                "feature_class": "A",
1011                "feature_code": "ADM1",
1012                "country": "US",
1013                "admin1": "CA",
1014                "population": 39512223,
1015                "latitude": "37.25022",
1016                "longitude": "-119.75126",
1017            },
1018
1019            // Made-up city with a long name (the digits in the name are to
1020            // prevent matches on this geoname in weather tests, etc.)
1021            {
1022                "id": 999,
1023                "name": "123 Long Name",
1024                "feature_class": "P",
1025                "feature_code": "PPLA2",
1026                "country": "US",
1027                "admin1": "NY",
1028                "population": 2,
1029                "latitude": "38.06084",
1030                "longitude": "-97.92977",
1031            },
1032
1033            // Made-up cities with punctuation their alternates (the digits in
1034            // the names are to prevent matches on these geonames in weather
1035            // tests, etc.)
1036            {
1037                "id": 1000,
1038                "name": "123 Punctuation City 0",
1039                "feature_class": "P",
1040                "feature_code": "PPLA2",
1041                "country": "XX",
1042                "population": 2,
1043                "latitude": "38.06084",
1044                "longitude": "-97.92977",
1045            },
1046            {
1047                "id": 1001,
1048                "name": "123 Punctuation City 1",
1049                "feature_class": "P",
1050                "feature_code": "PPLA2",
1051                "country": "XX",
1052                "population": 2,
1053                "latitude": "38.06084",
1054                "longitude": "-97.92977",
1055            },
1056            {
1057                "id": 1002,
1058                "name": "123 Punctuation City 2",
1059                "feature_class": "P",
1060                "feature_code": "PPLA2",
1061                "country": "XX",
1062                "population": 2,
1063                "latitude": "38.06084",
1064                "longitude": "-97.92977",
1065            },
1066            {
1067                "id": 1003,
1068                "name": "123 Punctuation City 3",
1069                "feature_class": "P",
1070                "feature_code": "PPLA2",
1071                "country": "XX",
1072                "population": 2,
1073                "latitude": "38.06084",
1074                "longitude": "-97.92977",
1075            },
1076            {
1077                "id": 1004,
1078                "name": "123 Punctuation City 4",
1079                "feature_class": "P",
1080                "feature_code": "PPLA2",
1081                "country": "XX",
1082                "population": 2,
1083                "latitude": "38.06084",
1084                "longitude": "-97.92977",
1085            },
1086            {
1087                "id": 1005,
1088                "name": "123 Punctuation City 5",
1089                "feature_class": "P",
1090                "feature_code": "PPLA2",
1091                "country": "XX",
1092                "population": 2,
1093                "latitude": "38.06084",
1094                "longitude": "-97.92977",
1095            },
1096
1097            // St. Louis (has '.' in name)
1098            {
1099                "id": 4407066,
1100                "name": "St. Louis",
1101                "feature_class": "P",
1102                "feature_code": "PPLA2",
1103                "country": "US",
1104                "admin1": "MO",
1105                "admin2": "510",
1106                "population": 315685,
1107                "latitude": "38.62727",
1108                "longitude": "-90.19789",
1109            },
1110
1111            // Carmel-by-the-Sea (has '-' in name)
1112            {
1113                "id": 5334320,
1114                "name": "Carmel-by-the-Sea",
1115                "feature_class": "P",
1116                "feature_code": "PPL",
1117                "country": "US",
1118                "admin1": "CA",
1119                "admin2": "053",
1120                "population": 3897,
1121                "latitude": "36.55524",
1122                "longitude": "-121.92329",
1123            },
1124
1125            // United States
1126            {
1127                "id": 6252001,
1128                "name": "United States",
1129                "feature_class": "A",
1130                "feature_code": "PCLI",
1131                "country": "US",
1132                "admin1": "00",
1133                "population": 327167434,
1134                "latitude": "39.76",
1135                "longitude": "-98.5",
1136            },
1137
1138            // Canada
1139            {
1140                "id": 6251999,
1141                "name": "Canada",
1142                "feature_class": "A",
1143                "feature_code": "PCLI",
1144                "country": "CA",
1145                "admin1": "00",
1146                "population": 37058856,
1147                "latitude": "60.10867",
1148                "longitude": "-113.64258",
1149            },
1150
1151            // ON
1152            {
1153                "id": 6093943,
1154                "name": "Ontario",
1155                "feature_class": "A",
1156                "feature_code": "ADM1",
1157                "country": "CA",
1158                "admin1": "08",
1159                "population": 12861940,
1160                "latitude": "49.25014",
1161                "longitude": "-84.49983",
1162            },
1163
1164            // Waterloo, ON
1165            {
1166                "id": 6176823,
1167                "name": "Waterloo",
1168                "feature_class": "P",
1169                "feature_code": "PPL",
1170                "country": "CA",
1171                "admin1": "08",
1172                "admin2": "3530",
1173                "population": 104986,
1174                "latitude": "43.4668",
1175                "longitude": "-80.51639",
1176            },
1177
1178            // La Visitation-de-l'Île-Dupas, Quebec
1179            {
1180                "id": 6050740,
1181                "name": "La Visitation-de-l'Île-Dupas",
1182                "feature_class": "P",
1183                "feature_code": "PPL",
1184                "country": "CA",
1185                "admin1": "10",
1186                "admin2": "14",
1187                "admin3": "52050",
1188                "population": 0,
1189                "latitude": "46.083333",
1190                "longitude": "-73.15",
1191            },
1192
1193            // UK
1194            {
1195                "id": 2635167,
1196                "name": "United Kingdom of Great Britain and Northern Ireland",
1197                "feature_class": "A",
1198                "feature_code": "PCLI",
1199                "country": "GB",
1200                "admin1": "00",
1201                "population": 66488991,
1202                "latitude": "54.75844",
1203                "longitude": "-2.69531",
1204            },
1205
1206            // England
1207            {
1208                "id": 6269131,
1209                "name": "England",
1210                "feature_class": "A",
1211                "feature_code": "ADM1",
1212                "country": "GB",
1213                "admin1": "ENG",
1214                "population": 57106398,
1215                "latitude": "52.16045",
1216                "longitude": "-0.70312",
1217            },
1218
1219            // Liverpool (metropolitan borough, admin2 for Liverpool city)
1220            {
1221                "id": 3333167,
1222                "name": "Liverpool",
1223                "feature_class": "A",
1224                "feature_code": "ADM2",
1225                "country": "GB",
1226                "admin1": "ENG",
1227                "admin2": "H8",
1228                "population": 484578,
1229                "latitude": "53.41667",
1230                "longitude": "-2.91667",
1231            },
1232
1233            // Liverpool (city)
1234            {
1235                "id": 2644210,
1236                "name": "Liverpool",
1237                "feature_class": "P",
1238                "feature_code": "PPLA2",
1239                "country": "GB",
1240                "admin1": "ENG",
1241                "admin2": "H8",
1242                "population": 864122,
1243                "latitude": "53.41058",
1244                "longitude": "-2.97794",
1245            },
1246
1247            // Germany
1248            {
1249                "id": 2921044,
1250                "name": "Federal Republic of Germany",
1251                "feature_class": "A",
1252                "feature_code": "PCLI",
1253                "country": "DE",
1254                "admin1": "00",
1255                "population": 82927922,
1256                "latitude": "51.5",
1257                "longitude": "10.5",
1258            },
1259
1260            // Gößnitz, DE (has non-basic-Latin chars and an `ascii_name`)
1261            {
1262                "id": 2918770,
1263                "name": "Gößnitz",
1264                "ascii_name": "Goessnitz",
1265                "feature_class": "P",
1266                "feature_code": "PPL",
1267                "country": "DE",
1268                "admin1": "15",
1269                "admin2": "00",
1270                "admin3": "16077",
1271                "admin4": "16077012",
1272                "population": 4104,
1273                "latitude": "50.88902",
1274                "longitude": "12.43292",
1275            },
1276
1277            // Rheinland-Pfalz (similar to ON, Canada: both are admin1's and
1278            // both have the same admin1 code)
1279            {
1280                "id": 2847618,
1281                "name": "Rheinland-Pfalz",
1282                "feature_class": "A",
1283                "feature_code": "ADM1",
1284                "country": "DE",
1285                "admin1": "08",
1286                "population": 4093903,
1287                "latitude": "49.66667",
1288                "longitude": "7.5",
1289            },
1290
1291            // Mainz, DE (city in Rheinland-Pfalz)
1292            {
1293                "id": 2874225,
1294                "name": "Mainz",
1295                "feature_class": "P",
1296                "feature_code": "PPLA",
1297                "country": "DE",
1298                "admin1": "08",
1299                "admin2": "00",
1300                "admin3": "07315",
1301                "admin4": "07315000",
1302                "population": 217123,
1303                "latitude": "49.98419",
1304                "longitude": "8.2791",
1305            },
1306        ])
1307    }
1308
1309    fn geonames_alternates_data_en() -> serde_json::Value {
1310        json!({
1311            "language": "en",
1312            "alternates_by_geoname_id": [
1313                // United States
1314                [6252001, [
1315                    { "name": "United States", "is_preferred": true, "is_short": true },
1316                    { "name": "United States of America", "is_preferred": true },
1317                    { "name": "USA", "is_short": true },
1318                    "America",
1319                ]],
1320
1321                // UK
1322                [2635167, [
1323                    { "name": "United Kingdom", "is_preferred": true, "is_short": true },
1324                    { "name": "Great Britain", "is_short": true },
1325                    { "name": "UK", "is_short": true },
1326                    "Britain",
1327                    "U.K.",
1328                    "United Kingdom of Great Britain and Northern Ireland",
1329                    "U.K",
1330                ]],
1331
1332                // New York City
1333                [5128581, [
1334                    { "name": "New York", "is_preferred": true, "is_short": true },
1335                ]],
1336
1337                // Made-up city with a long name
1338                [999, [LONG_NAME]],
1339
1340                // Made-up cities with punctuation in their alternates
1341                [1000, [
1342                    // The first name is shorter, so we should prefer it.
1343                    "123 Made Up City w Punct in Alternates",
1344                    "123 Made-Up City. w/ Punct. in Alternates",
1345                ]],
1346                [1001, [
1347                    // The second name is shorter, so we should prefer it.
1348                    "123 Made-Up City. w/ Punct. in Alternates",
1349                    "123 Made Up City w Punct in Alternates",
1350                ]],
1351                [1002, [
1352                    // These names are the same length but the second will be
1353                    // sorted first due to the `a.name ASC` in the `ORDER BY`,
1354                    // so we should prefer it.
1355                    "123 Made-Up City. w/ Punct. in Alternates",
1356                    "123 Made Up City  w  Punct  in Alternates",
1357                    "123 Made-Up City. w/ Punct. in Alternatex",
1358                ]],
1359                [1003, [
1360                    // The second name has `is_preferred`, so we should prefer
1361                    // it.
1362                    "123 Aaa Bbb Ccc Ddd",
1363                    { "name": "123 Aaa, Bbb-Ccc. Ddd", "is_preferred": true },
1364                    "123 Aaa Bbb Ccc Eee",
1365                ]],
1366                [1004, [
1367                    // The second name has `is_short`, so we should prefer it.
1368                    "123 Aaa Bbb Ccc Ddd",
1369                    { "name": "123 Aaa, Bbb-Ccc. Ddd", "is_short": true },
1370                    "123 Aaa Bbb Ccc Eee",
1371                ]],
1372                [1005, [
1373                    // The second name has `is_preferred` and `is_short`, so we
1374                    // should prefer it.
1375                    "123 Aaa Bbb Ccc Ddd",
1376                    { "name": "123 Aaa, Bbb-Ccc. Ddd", "is_preferred": true, "is_short": true },
1377                    "123 Aaa Bbb Ccc Eee",
1378                ]],
1379            ],
1380        })
1381    }
1382
1383    fn geonames_alternates_data_abbr() -> serde_json::Value {
1384        json!({
1385            "language": "abbr",
1386            "alternates_by_geoname_id": [
1387                // AL
1388                [4829764, ["AL"]],
1389                // IA
1390                [4862182, ["IA"]],
1391                // ON
1392                [6093943, [
1393                    "ON",
1394                    "Ont.",
1395                ]],
1396                // NY State
1397                [5128638, ["NY"]],
1398                // TX
1399                [4736286, ["TX"]],
1400                // New York City
1401                [5128581, [
1402                    "NYC",
1403                    "NY",
1404                ]],
1405                // CA
1406                [5332921, ["CA"]],
1407                // United States
1408                [6252001, [
1409                    { "name": "US", "is_short": true },
1410                    "U.S.",
1411                    "USA",
1412                    "U.S.A.",
1413                ]],
1414                // Liverpool (metropolitan borough, admin2 for Liverpool city)
1415                [3333167, ["LIV"]],
1416                // UK
1417                [2635167, [
1418                    "UK",
1419                ]],
1420            ],
1421        })
1422    }
1423
1424    fn geonames_alternates_data_iata() -> serde_json::Value {
1425        json!({
1426            "language": "iata",
1427            "alternates_by_geoname_id": [
1428                // Waco, TX
1429                [4739526, ["ACT"]],
1430                // Rochester, NY
1431                [5134086, ["ROC"]],
1432            ],
1433        })
1434    }
1435
1436    pub(crate) fn waterloo_al() -> Geoname {
1437        Geoname {
1438            geoname_id: 4096497,
1439            geoname_type: GeonameType::City,
1440            name: "Waterloo".to_string(),
1441            feature_class: "P".to_string(),
1442            feature_code: "PPL".to_string(),
1443            country_code: "US".to_string(),
1444            admin_division_codes: [(1, "AL".to_string()), (2, "077".to_string())].into(),
1445            population: 200,
1446            latitude: "34.91814".to_string(),
1447            longitude: "-88.0642".to_string(),
1448        }
1449    }
1450
1451    pub(crate) fn waterloo_ia() -> Geoname {
1452        Geoname {
1453            geoname_id: 4880889,
1454            geoname_type: GeonameType::City,
1455            name: "Waterloo".to_string(),
1456            feature_class: "P".to_string(),
1457            feature_code: "PPLA2".to_string(),
1458            country_code: "US".to_string(),
1459            admin_division_codes: [
1460                (1, "IA".to_string()),
1461                (2, "013".to_string()),
1462                (3, "94597".to_string()),
1463            ]
1464            .into(),
1465            population: 68460,
1466            latitude: "42.49276".to_string(),
1467            longitude: "-92.34296".to_string(),
1468        }
1469    }
1470
1471    pub(crate) fn nyc() -> Geoname {
1472        Geoname {
1473            geoname_id: 5128581,
1474            geoname_type: GeonameType::City,
1475            name: "New York City".to_string(),
1476            feature_class: "P".to_string(),
1477            feature_code: "PPL".to_string(),
1478            country_code: "US".to_string(),
1479            admin_division_codes: [(1, "NY".to_string())].into(),
1480            population: 8804190,
1481            latitude: "40.71427".to_string(),
1482            longitude: "-74.00597".to_string(),
1483        }
1484    }
1485
1486    pub(crate) fn rochester() -> Geoname {
1487        Geoname {
1488            geoname_id: 5134086,
1489            geoname_type: GeonameType::City,
1490            name: "Rochester".to_string(),
1491            feature_class: "P".to_string(),
1492            feature_code: "PPLA2".to_string(),
1493            country_code: "US".to_string(),
1494            admin_division_codes: [
1495                (1, "NY".to_string()),
1496                (2, "055".to_string()),
1497                (3, "63000".to_string()),
1498            ]
1499            .into(),
1500            population: 209802,
1501            latitude: "43.15478".to_string(),
1502            longitude: "-77.61556".to_string(),
1503        }
1504    }
1505
1506    pub(crate) fn waco() -> Geoname {
1507        Geoname {
1508            geoname_id: 4739526,
1509            geoname_type: GeonameType::City,
1510            name: "Waco".to_string(),
1511            feature_class: "P".to_string(),
1512            feature_code: "PPLA2".to_string(),
1513            country_code: "US".to_string(),
1514            admin_division_codes: [(1, "TX".to_string()), (2, "309".to_string())].into(),
1515            population: 132356,
1516            latitude: "31.54933".to_string(),
1517            longitude: "-97.14667".to_string(),
1518        }
1519    }
1520
1521    pub(crate) fn new_orleans() -> Geoname {
1522        Geoname {
1523            geoname_id: 4335045,
1524            geoname_type: GeonameType::City,
1525            name: "New Orleans".to_string(),
1526            feature_class: "P".to_string(),
1527            feature_code: "PPLA2".to_string(),
1528            country_code: "US".to_string(),
1529            admin_division_codes: [
1530                (1, "LA".to_string()),
1531                (2, "071".to_string()),
1532                (3, "98000".to_string()),
1533            ]
1534            .into(),
1535            population: 389617,
1536            latitude: "29.95465".to_string(),
1537            longitude: "-90.07507".to_string(),
1538        }
1539    }
1540
1541    pub(crate) fn carlsbad() -> Geoname {
1542        Geoname {
1543            geoname_id: 5334223,
1544            geoname_type: GeonameType::City,
1545            name: "Carlsbad".to_string(),
1546            feature_class: "P".to_string(),
1547            feature_code: "PPL".to_string(),
1548            country_code: "US".to_string(),
1549            admin_division_codes: [(1, "CA".to_string()), (2, "073".to_string())].into(),
1550            population: 114746,
1551            latitude: "33.15809".to_string(),
1552            longitude: "-117.35059".to_string(),
1553        }
1554    }
1555
1556    pub(crate) fn san_diego() -> Geoname {
1557        Geoname {
1558            geoname_id: 5391811,
1559            geoname_type: GeonameType::City,
1560            name: "San Diego".to_string(),
1561            feature_class: "P".to_string(),
1562            feature_code: "PPLA2".to_string(),
1563            country_code: "US".to_string(),
1564            admin_division_codes: [(1, "CA".to_string()), (2, "073".to_string())].into(),
1565            population: 1394928,
1566            latitude: "32.71571".to_string(),
1567            longitude: "-117.16472".to_string(),
1568        }
1569    }
1570
1571    pub(crate) fn long_name_city() -> Geoname {
1572        Geoname {
1573            geoname_id: 999,
1574            geoname_type: GeonameType::City,
1575            name: "123 Long Name".to_string(),
1576            feature_class: "P".to_string(),
1577            feature_code: "PPLA2".to_string(),
1578            country_code: "US".to_string(),
1579            admin_division_codes: [(1, "NY".to_string())].into(),
1580            population: 2,
1581            latitude: "38.06084".to_string(),
1582            longitude: "-97.92977".to_string(),
1583        }
1584    }
1585
1586    pub(crate) fn punctuation_city(i: i64) -> Geoname {
1587        Geoname {
1588            geoname_id: 1000 + i,
1589            geoname_type: GeonameType::City,
1590            name: format!("123 Punctuation City {i}"),
1591            feature_class: "P".to_string(),
1592            feature_code: "PPLA2".to_string(),
1593            country_code: "XX".to_string(),
1594            admin_division_codes: [].into(),
1595            population: 2,
1596            latitude: "38.06084".to_string(),
1597            longitude: "-97.92977".to_string(),
1598        }
1599    }
1600
1601    pub(crate) fn al() -> Geoname {
1602        Geoname {
1603            geoname_id: 4829764,
1604            geoname_type: GeonameType::AdminDivision { level: 1 },
1605            name: "Alabama".to_string(),
1606            feature_class: "A".to_string(),
1607            feature_code: "ADM1".to_string(),
1608            country_code: "US".to_string(),
1609            admin_division_codes: [(1, "AL".to_string())].into(),
1610            population: 4530315,
1611            latitude: "32.75041".to_string(),
1612            longitude: "-86.75026".to_string(),
1613        }
1614    }
1615
1616    pub(crate) fn ia() -> Geoname {
1617        Geoname {
1618            geoname_id: 4862182,
1619            geoname_type: GeonameType::AdminDivision { level: 1 },
1620            name: "Iowa".to_string(),
1621            feature_class: "A".to_string(),
1622            feature_code: "ADM1".to_string(),
1623            country_code: "US".to_string(),
1624            admin_division_codes: [(1, "IA".to_string())].into(),
1625            population: 2955010,
1626            latitude: "42.00027".to_string(),
1627            longitude: "-93.50049".to_string(),
1628        }
1629    }
1630
1631    pub(crate) fn ny_state() -> Geoname {
1632        Geoname {
1633            geoname_id: 5128638,
1634            geoname_type: GeonameType::AdminDivision { level: 1 },
1635            name: "New York".to_string(),
1636            feature_class: "A".to_string(),
1637            feature_code: "ADM1".to_string(),
1638            country_code: "US".to_string(),
1639            admin_division_codes: [(1, "NY".to_string())].into(),
1640            population: 19274244,
1641            latitude: "43.00035".to_string(),
1642            longitude: "-75.4999".to_string(),
1643        }
1644    }
1645
1646    pub(crate) fn st_louis() -> Geoname {
1647        Geoname {
1648            geoname_id: 4407066,
1649            geoname_type: GeonameType::City,
1650            name: "St. Louis".to_string(),
1651            feature_class: "P".to_string(),
1652            feature_code: "PPLA2".to_string(),
1653            country_code: "US".to_string(),
1654            admin_division_codes: [(1, "MO".to_string()), (2, "510".to_string())].into(),
1655            population: 315685,
1656            latitude: "38.62727".to_string(),
1657            longitude: "-90.19789".to_string(),
1658        }
1659    }
1660
1661    pub(crate) fn carmel() -> Geoname {
1662        Geoname {
1663            geoname_id: 5334320,
1664            geoname_type: GeonameType::City,
1665            name: "Carmel-by-the-Sea".to_string(),
1666            feature_class: "P".to_string(),
1667            feature_code: "PPL".to_string(),
1668            country_code: "US".to_string(),
1669            admin_division_codes: [(1, "CA".to_string()), (2, "053".to_string())].into(),
1670            population: 3897,
1671            latitude: "36.55524".to_string(),
1672            longitude: "-121.92329".to_string(),
1673        }
1674    }
1675
1676    pub(crate) fn us() -> Geoname {
1677        Geoname {
1678            geoname_id: 6252001,
1679            geoname_type: GeonameType::Country,
1680            name: "United States".to_string(),
1681            feature_class: "A".to_string(),
1682            feature_code: "PCLI".to_string(),
1683            country_code: "US".to_string(),
1684            admin_division_codes: [(1, "00".to_string())].into(),
1685            population: 327167434,
1686            latitude: "39.76".to_string(),
1687            longitude: "-98.5".to_string(),
1688        }
1689    }
1690
1691    pub(crate) fn canada() -> Geoname {
1692        Geoname {
1693            geoname_id: 6251999,
1694            geoname_type: GeonameType::Country,
1695            name: "Canada".to_string(),
1696            feature_class: "A".to_string(),
1697            feature_code: "PCLI".to_string(),
1698            country_code: "CA".to_string(),
1699            admin_division_codes: [(1, "00".to_string())].into(),
1700            population: 37058856,
1701            latitude: "60.10867".to_string(),
1702            longitude: "-113.64258".to_string(),
1703        }
1704    }
1705
1706    pub(crate) fn on() -> Geoname {
1707        Geoname {
1708            geoname_id: 6093943,
1709            geoname_type: GeonameType::AdminDivision { level: 1 },
1710            name: "Ontario".to_string(),
1711            feature_class: "A".to_string(),
1712            feature_code: "ADM1".to_string(),
1713            country_code: "CA".to_string(),
1714            admin_division_codes: [(1, "08".to_string())].into(),
1715            population: 12861940,
1716            latitude: "49.25014".to_string(),
1717            longitude: "-84.49983".to_string(),
1718        }
1719    }
1720
1721    pub(crate) fn waterloo_on() -> Geoname {
1722        Geoname {
1723            geoname_id: 6176823,
1724            geoname_type: GeonameType::City,
1725            name: "Waterloo".to_string(),
1726            feature_class: "P".to_string(),
1727            feature_code: "PPL".to_string(),
1728            country_code: "CA".to_string(),
1729            admin_division_codes: [(1, "08".to_string()), (2, "3530".to_string())].into(),
1730            population: 104986,
1731            latitude: "43.4668".to_string(),
1732            longitude: "-80.51639".to_string(),
1733        }
1734    }
1735
1736    pub(crate) fn la_visitation() -> Geoname {
1737        Geoname {
1738            geoname_id: 6050740,
1739            geoname_type: GeonameType::City,
1740            name: "La Visitation-de-l'Île-Dupas".to_string(),
1741            feature_class: "P".to_string(),
1742            feature_code: "PPL".to_string(),
1743            country_code: "CA".to_string(),
1744            admin_division_codes: [
1745                (1, "10".to_string()),
1746                (2, "14".to_string()),
1747                (3, "52050".to_string()),
1748            ]
1749            .into(),
1750            population: 0,
1751            latitude: "46.083333".to_string(),
1752            longitude: "-73.15".to_string(),
1753        }
1754    }
1755
1756    pub(crate) fn uk() -> Geoname {
1757        Geoname {
1758            geoname_id: 2635167,
1759            geoname_type: GeonameType::Country,
1760            name: "United Kingdom of Great Britain and Northern Ireland".to_string(),
1761            feature_class: "A".to_string(),
1762            feature_code: "PCLI".to_string(),
1763            country_code: "GB".to_string(),
1764            admin_division_codes: [(1, "00".to_string())].into(),
1765            population: 66488991,
1766            latitude: "54.75844".to_string(),
1767            longitude: "-2.69531".to_string(),
1768        }
1769    }
1770
1771    pub(crate) fn england() -> Geoname {
1772        Geoname {
1773            geoname_id: 6269131,
1774            geoname_type: GeonameType::AdminDivision { level: 1 },
1775            name: "England".to_string(),
1776            feature_class: "A".to_string(),
1777            feature_code: "ADM1".to_string(),
1778            country_code: "GB".to_string(),
1779            admin_division_codes: [(1, "ENG".to_string())].into(),
1780            population: 57106398,
1781            latitude: "52.16045".to_string(),
1782            longitude: "-0.70312".to_string(),
1783        }
1784    }
1785
1786    pub(crate) fn liverpool_metro() -> Geoname {
1787        Geoname {
1788            geoname_id: 3333167,
1789            geoname_type: GeonameType::AdminDivision { level: 2 },
1790            name: "Liverpool".to_string(),
1791            feature_class: "A".to_string(),
1792            feature_code: "ADM2".to_string(),
1793            country_code: "GB".to_string(),
1794            admin_division_codes: [(1, "ENG".to_string()), (2, "H8".to_string())].into(),
1795            population: 484578,
1796            latitude: "53.41667".to_string(),
1797            longitude: "-2.91667".to_string(),
1798        }
1799    }
1800
1801    pub(crate) fn liverpool_city() -> Geoname {
1802        Geoname {
1803            geoname_id: 2644210,
1804            geoname_type: GeonameType::City,
1805            name: "Liverpool".to_string(),
1806            feature_class: "P".to_string(),
1807            feature_code: "PPLA2".to_string(),
1808            country_code: "GB".to_string(),
1809            admin_division_codes: [(1, "ENG".to_string()), (2, "H8".to_string())].into(),
1810            population: 864122,
1811            latitude: "53.41058".to_string(),
1812            longitude: "-2.97794".to_string(),
1813        }
1814    }
1815
1816    pub(crate) fn germany() -> Geoname {
1817        Geoname {
1818            geoname_id: 2921044,
1819            geoname_type: GeonameType::Country,
1820            name: "Federal Republic of Germany".to_string(),
1821            feature_class: "A".to_string(),
1822            feature_code: "PCLI".to_string(),
1823            country_code: "DE".to_string(),
1824            admin_division_codes: [(1, "00".to_string())].into(),
1825            population: 82927922,
1826            latitude: "51.5".to_string(),
1827            longitude: "10.5".to_string(),
1828        }
1829    }
1830
1831    pub(crate) fn goessnitz() -> Geoname {
1832        Geoname {
1833            geoname_id: 2918770,
1834            geoname_type: GeonameType::City,
1835            name: "Gößnitz".to_string(),
1836            feature_class: "P".to_string(),
1837            feature_code: "PPL".to_string(),
1838            country_code: "DE".to_string(),
1839            admin_division_codes: [
1840                (1, "15".to_string()),
1841                (2, "00".to_string()),
1842                (3, "16077".to_string()),
1843                (4, "16077012".to_string()),
1844            ]
1845            .into(),
1846            population: 4104,
1847            latitude: "50.88902".to_string(),
1848            longitude: "12.43292".to_string(),
1849        }
1850    }
1851
1852    pub(crate) fn rheinland_pfalz() -> Geoname {
1853        Geoname {
1854            geoname_id: 2847618,
1855            geoname_type: GeonameType::AdminDivision { level: 1 },
1856            name: "Rheinland-Pfalz".to_string(),
1857            feature_class: "A".to_string(),
1858            feature_code: "ADM1".to_string(),
1859            country_code: "DE".to_string(),
1860            admin_division_codes: [(1, "08".to_string())].into(),
1861            population: 4093903,
1862            latitude: "49.66667".to_string(),
1863            longitude: "7.5".to_string(),
1864        }
1865    }
1866
1867    pub(crate) fn mainz() -> Geoname {
1868        Geoname {
1869            geoname_id: 2874225,
1870            geoname_type: GeonameType::City,
1871            name: "Mainz".to_string(),
1872            feature_class: "P".to_string(),
1873            feature_code: "PPLA".to_string(),
1874            country_code: "DE".to_string(),
1875            admin_division_codes: [
1876                (1, "08".to_string()),
1877                (2, "00".to_string()),
1878                (3, "07315".to_string()),
1879                (4, "07315000".to_string()),
1880            ]
1881            .into(),
1882            population: 217123,
1883            latitude: "49.98419".to_string(),
1884            longitude: "8.2791".to_string(),
1885        }
1886    }
1887
1888    #[test]
1889    fn is_related_to() -> anyhow::Result<()> {
1890        // The geonames in each vec should be pairwise related.
1891        let tests = [
1892            vec![waterloo_ia(), ia(), us()],
1893            vec![waterloo_al(), al(), us()],
1894            vec![waterloo_on(), on(), canada()],
1895            vec![liverpool_city(), liverpool_metro(), england(), uk()],
1896            vec![mainz(), rheinland_pfalz(), germany()],
1897        ];
1898        for geonames in tests {
1899            for g in &geonames {
1900                // A geoname should always be related to itself.
1901                assert!(
1902                    g.is_related_to(g),
1903                    "g.is_related_to(g) should always be true: {:?}",
1904                    g
1905                );
1906            }
1907            for a_and_b in geonames.iter().permutations(2) {
1908                assert!(
1909                    a_and_b[0].is_related_to(a_and_b[1]),
1910                    "is_related_to: {:?}",
1911                    a_and_b
1912                );
1913            }
1914        }
1915        Ok(())
1916    }
1917
1918    #[test]
1919    fn is_not_related_to() -> anyhow::Result<()> {
1920        // The geonames in each vec should not be pairwise related.
1921        let tests = [
1922            vec![waterloo_ia(), al()],
1923            vec![waterloo_ia(), on()],
1924            vec![waterloo_ia(), canada(), uk()],
1925            vec![waterloo_al(), ia()],
1926            vec![waterloo_al(), on()],
1927            vec![waterloo_al(), canada(), uk()],
1928            vec![waterloo_on(), al()],
1929            vec![waterloo_on(), ia()],
1930            vec![waterloo_on(), us(), uk()],
1931            vec![
1932                waterloo_ia(),
1933                waterloo_al(),
1934                waterloo_on(),
1935                liverpool_city(),
1936            ],
1937            vec![liverpool_city(), us(), canada()],
1938            vec![liverpool_metro(), us(), canada()],
1939            vec![england(), us(), canada()],
1940            vec![al(), ia(), on(), england()],
1941            vec![us(), canada(), uk()],
1942            // ON, Canada and Rheinland-Pfalz are both admin1's and both have
1943            // the same admin1 code, but they're not related
1944            vec![on(), rheinland_pfalz()],
1945            // Mainz is a city in Rheinland-Pfalz
1946            vec![on(), mainz()],
1947            // Waterloo, ON is a city in ON
1948            vec![rheinland_pfalz(), waterloo_on()],
1949        ];
1950        for geonames in tests {
1951            for a_and_b in geonames.iter().permutations(2) {
1952                assert!(
1953                    !a_and_b[0].is_related_to(a_and_b[1]),
1954                    "!is_related_to: {:?}",
1955                    a_and_b
1956                );
1957            }
1958        }
1959        Ok(())
1960    }
1961
1962    #[test]
1963    fn alternates() -> anyhow::Result<()> {
1964        before_each();
1965
1966        let store = new_test_store();
1967
1968        // Ingest weather to also ingest geonames.
1969        store.ingest(SuggestIngestionConstraints {
1970            providers: Some(vec![SuggestionProvider::Weather]),
1971            ..SuggestIngestionConstraints::all_providers()
1972        });
1973
1974        #[derive(Debug)]
1975        struct Test {
1976            geoname: Geoname,
1977            expected: GeonameAlternates,
1978        }
1979
1980        impl Test {
1981            fn new<F: FnOnce(&Geoname) -> GeonameAlternates>(
1982                geoname: Geoname,
1983                expected: F,
1984            ) -> Self {
1985                Test {
1986                    expected: expected(&geoname),
1987                    geoname,
1988                }
1989            }
1990        }
1991
1992        let tests = [
1993            Test::new(nyc(), |g| GeonameAlternates {
1994                geoname: AlternateNames {
1995                    primary: g.name.clone(),
1996                    localized: Some("New York".to_string()),
1997                    abbreviation: Some("NY".to_string()),
1998                },
1999                country: Some(AlternateNames {
2000                    primary: us().name,
2001                    localized: Some("United States".to_string()),
2002                    abbreviation: Some("US".to_string()),
2003                }),
2004                admin_divisions: [(
2005                    1,
2006                    AlternateNames {
2007                        primary: ny_state().name,
2008                        localized: Some("New York".to_string()),
2009                        abbreviation: Some("NY".to_string()),
2010                    },
2011                )]
2012                .into(),
2013            }),
2014            Test::new(waterloo_on(), |g| GeonameAlternates {
2015                geoname: AlternateNames {
2016                    primary: g.name.clone(),
2017                    localized: Some("Waterloo".to_string()),
2018                    abbreviation: None,
2019                },
2020                country: Some(AlternateNames {
2021                    primary: "Canada".to_string(),
2022                    // There are no alternates for Canada so `localized` should
2023                    // be the primary name
2024                    localized: Some("Canada".to_string()),
2025                    abbreviation: None,
2026                }),
2027                admin_divisions: [(
2028                    1,
2029                    AlternateNames {
2030                        primary: on().name,
2031                        localized: Some("Ontario".to_string()),
2032                        abbreviation: Some("ON".to_string()),
2033                    },
2034                )]
2035                .into(),
2036            }),
2037            Test::new(liverpool_city(), |g| GeonameAlternates {
2038                geoname: AlternateNames {
2039                    primary: g.name.clone(),
2040                    localized: Some("Liverpool".to_string()),
2041                    abbreviation: None,
2042                },
2043                country: Some(AlternateNames {
2044                    primary: uk().name,
2045                    localized: Some("United Kingdom".to_string()),
2046                    abbreviation: Some("UK".to_string()),
2047                }),
2048                admin_divisions: [
2049                    (
2050                        1,
2051                        AlternateNames {
2052                            primary: england().name,
2053                            localized: Some("England".to_string()),
2054                            abbreviation: None,
2055                        },
2056                    ),
2057                    (
2058                        2,
2059                        AlternateNames {
2060                            primary: liverpool_metro().name,
2061                            localized: Some("Liverpool".to_string()),
2062                            abbreviation: Some("LIV".to_string()),
2063                        },
2064                    ),
2065                ]
2066                .into(),
2067            }),
2068            Test::new(mainz(), |g| GeonameAlternates {
2069                geoname: AlternateNames {
2070                    primary: g.name.clone(),
2071                    localized: Some(g.name.clone()),
2072                    abbreviation: None,
2073                },
2074                country: Some(AlternateNames {
2075                    primary: germany().name,
2076                    localized: Some(germany().name),
2077                    abbreviation: None,
2078                }),
2079                admin_divisions: [(
2080                    1,
2081                    AlternateNames {
2082                        primary: rheinland_pfalz().name,
2083                        localized: Some(rheinland_pfalz().name),
2084                        abbreviation: None,
2085                    },
2086                )]
2087                .into(),
2088            }),
2089            Test::new(punctuation_city(0), |g| GeonameAlternates {
2090                geoname: AlternateNames {
2091                    primary: g.name.clone(),
2092                    localized: Some("123 Made Up City w Punct in Alternates".to_string()),
2093                    abbreviation: None,
2094                },
2095                country: None,
2096                admin_divisions: [].into(),
2097            }),
2098            Test::new(punctuation_city(1), |g| GeonameAlternates {
2099                geoname: AlternateNames {
2100                    primary: g.name.clone(),
2101                    localized: Some("123 Made Up City w Punct in Alternates".to_string()),
2102                    abbreviation: None,
2103                },
2104                country: None,
2105                admin_divisions: [].into(),
2106            }),
2107            Test::new(punctuation_city(2), |g| GeonameAlternates {
2108                geoname: AlternateNames {
2109                    primary: g.name.clone(),
2110                    localized: Some("123 Made Up City  w  Punct  in Alternates".to_string()),
2111                    abbreviation: None,
2112                },
2113                country: None,
2114                admin_divisions: [].into(),
2115            }),
2116            Test::new(punctuation_city(3), |g| GeonameAlternates {
2117                geoname: AlternateNames {
2118                    primary: g.name.clone(),
2119                    localized: Some("123 Aaa, Bbb-Ccc. Ddd".to_string()),
2120                    abbreviation: None,
2121                },
2122                country: None,
2123                admin_divisions: [].into(),
2124            }),
2125            Test::new(punctuation_city(4), |g| GeonameAlternates {
2126                geoname: AlternateNames {
2127                    primary: g.name.clone(),
2128                    localized: Some("123 Aaa, Bbb-Ccc. Ddd".to_string()),
2129                    abbreviation: None,
2130                },
2131                country: None,
2132                admin_divisions: [].into(),
2133            }),
2134            Test::new(punctuation_city(5), |g| GeonameAlternates {
2135                geoname: AlternateNames {
2136                    primary: g.name.clone(),
2137                    localized: Some("123 Aaa, Bbb-Ccc. Ddd".to_string()),
2138                    abbreviation: None,
2139                },
2140                country: None,
2141                admin_divisions: [].into(),
2142            }),
2143        ];
2144
2145        store.read(|dao| {
2146            for t in tests {
2147                assert_eq!(
2148                    dao.fetch_geoname_alternates(&t.geoname)?,
2149                    t.expected,
2150                    "geoname={:?}",
2151                    t.geoname
2152                );
2153            }
2154            Ok(())
2155        })?;
2156
2157        Ok(())
2158    }
2159
2160    #[test]
2161    fn geonames() -> anyhow::Result<()> {
2162        before_each();
2163
2164        let store = new_test_store();
2165
2166        // Ingest weather to also ingest geonames.
2167        store.ingest(SuggestIngestionConstraints {
2168            providers: Some(vec![SuggestionProvider::Weather]),
2169            ..SuggestIngestionConstraints::all_providers()
2170        });
2171
2172        #[derive(Debug)]
2173        struct Test {
2174            query: &'static str,
2175            match_name_prefix: bool,
2176            filter: Option<Vec<Geoname>>,
2177            expected: Vec<GeonameMatch>,
2178        }
2179
2180        let tests = [
2181            Test {
2182                query: "ia",
2183                match_name_prefix: false,
2184                filter: None,
2185                expected: vec![GeonameMatch {
2186                    geoname: ia(),
2187                    match_type: GeonameMatchType::Abbreviation,
2188                    prefix: false,
2189                }],
2190            },
2191            Test {
2192                query: "ia",
2193                match_name_prefix: true,
2194                filter: None,
2195                expected: vec![GeonameMatch {
2196                    geoname: ia(),
2197                    match_type: GeonameMatchType::Abbreviation,
2198                    prefix: false,
2199                }],
2200            },
2201            Test {
2202                query: "ia",
2203                match_name_prefix: false,
2204                filter: Some(vec![waterloo_ia(), waterloo_al()]),
2205                expected: vec![],
2206            },
2207            Test {
2208                query: "ia",
2209                match_name_prefix: false,
2210                filter: Some(vec![waterloo_ia()]),
2211                expected: vec![GeonameMatch {
2212                    geoname: ia(),
2213                    match_type: GeonameMatchType::Abbreviation,
2214                    prefix: false,
2215                }],
2216            },
2217            Test {
2218                query: "ia",
2219                match_name_prefix: false,
2220                filter: Some(vec![us()]),
2221                expected: vec![GeonameMatch {
2222                    geoname: ia(),
2223                    match_type: GeonameMatchType::Abbreviation,
2224                    prefix: false,
2225                }],
2226            },
2227            Test {
2228                query: "ia",
2229                match_name_prefix: false,
2230                filter: Some(vec![waterloo_al()]),
2231                expected: vec![],
2232            },
2233            Test {
2234                query: "ia",
2235                match_name_prefix: false,
2236                filter: Some(vec![canada()]),
2237                expected: vec![],
2238            },
2239            Test {
2240                query: "ia",
2241                match_name_prefix: false,
2242                filter: Some(vec![uk()]),
2243                expected: vec![],
2244            },
2245            Test {
2246                query: "iaxyz",
2247                match_name_prefix: false,
2248                filter: None,
2249                expected: vec![],
2250            },
2251            Test {
2252                query: "iaxyz",
2253                match_name_prefix: true,
2254                filter: None,
2255                expected: vec![],
2256            },
2257            Test {
2258                query: "iowa",
2259                match_name_prefix: false,
2260                filter: None,
2261                expected: vec![GeonameMatch {
2262                    geoname: ia(),
2263                    match_type: GeonameMatchType::Name,
2264                    prefix: false,
2265                }],
2266            },
2267            Test {
2268                query: "al",
2269                match_name_prefix: false,
2270                filter: None,
2271                expected: vec![GeonameMatch {
2272                    geoname: al(),
2273                    match_type: GeonameMatchType::Abbreviation,
2274                    prefix: false,
2275                }],
2276            },
2277            // "al" is both a name prefix and an abbreviation.
2278            Test {
2279                query: "al",
2280                match_name_prefix: true,
2281                filter: None,
2282                expected: vec![
2283                    GeonameMatch {
2284                        geoname: al(),
2285                        match_type: GeonameMatchType::Name,
2286                        prefix: true,
2287                    },
2288                    GeonameMatch {
2289                        geoname: al(),
2290                        match_type: GeonameMatchType::Abbreviation,
2291                        prefix: false,
2292                    },
2293                ],
2294            },
2295            Test {
2296                query: "waterloo",
2297                match_name_prefix: false,
2298                filter: Some(vec![ia()]),
2299                expected: vec![GeonameMatch {
2300                    geoname: waterloo_ia(),
2301                    match_type: GeonameMatchType::Name,
2302                    prefix: false,
2303                }],
2304            },
2305            Test {
2306                query: "waterloo",
2307                match_name_prefix: false,
2308                filter: Some(vec![al()]),
2309                expected: vec![GeonameMatch {
2310                    geoname: waterloo_al(),
2311                    match_type: GeonameMatchType::Name,
2312                    prefix: false,
2313                }],
2314            },
2315            Test {
2316                query: "waterloo",
2317                match_name_prefix: false,
2318                filter: Some(vec![ny_state()]),
2319                expected: vec![],
2320            },
2321            Test {
2322                query: "waterloo",
2323                match_name_prefix: false,
2324                filter: None,
2325                // Matches should be returned by population descending.
2326                expected: vec![
2327                    GeonameMatch {
2328                        geoname: waterloo_on(),
2329                        match_type: GeonameMatchType::Name,
2330                        prefix: false,
2331                    },
2332                    GeonameMatch {
2333                        geoname: waterloo_ia(),
2334                        match_type: GeonameMatchType::Name,
2335                        prefix: false,
2336                    },
2337                    GeonameMatch {
2338                        geoname: waterloo_al(),
2339                        match_type: GeonameMatchType::Name,
2340                        prefix: false,
2341                    },
2342                ],
2343            },
2344            Test {
2345                query: "water",
2346                match_name_prefix: true,
2347                filter: None,
2348                expected: vec![
2349                    GeonameMatch {
2350                        geoname: waterloo_on(),
2351                        match_type: GeonameMatchType::Name,
2352                        prefix: true,
2353                    },
2354                    GeonameMatch {
2355                        geoname: waterloo_ia(),
2356                        match_type: GeonameMatchType::Name,
2357                        prefix: true,
2358                    },
2359                    GeonameMatch {
2360                        geoname: waterloo_al(),
2361                        match_type: GeonameMatchType::Name,
2362                        prefix: true,
2363                    },
2364                ],
2365            },
2366            Test {
2367                query: "water",
2368                match_name_prefix: false,
2369                filter: None,
2370                expected: vec![],
2371            },
2372            Test {
2373                query: "waterloo",
2374                match_name_prefix: false,
2375                filter: Some(vec![us()]),
2376                expected: vec![
2377                    GeonameMatch {
2378                        geoname: waterloo_ia(),
2379                        match_type: GeonameMatchType::Name,
2380                        prefix: false,
2381                    },
2382                    GeonameMatch {
2383                        geoname: waterloo_al(),
2384                        match_type: GeonameMatchType::Name,
2385                        prefix: false,
2386                    },
2387                ],
2388            },
2389            Test {
2390                query: "waterloo",
2391                match_name_prefix: false,
2392                filter: Some(vec![al(), us()]),
2393                expected: vec![GeonameMatch {
2394                    geoname: waterloo_al(),
2395                    match_type: GeonameMatchType::Name,
2396                    prefix: false,
2397                }],
2398            },
2399            Test {
2400                query: "waterloo",
2401                match_name_prefix: false,
2402                filter: Some(vec![us(), al()]),
2403                expected: vec![GeonameMatch {
2404                    geoname: waterloo_al(),
2405                    match_type: GeonameMatchType::Name,
2406                    prefix: false,
2407                }],
2408            },
2409            Test {
2410                query: "waterloo",
2411                match_name_prefix: false,
2412                filter: Some(vec![ia(), al()]),
2413                expected: vec![],
2414            },
2415            Test {
2416                query: "waterloo",
2417                match_name_prefix: false,
2418                filter: Some(vec![canada()]),
2419                expected: vec![GeonameMatch {
2420                    geoname: waterloo_on(),
2421                    match_type: GeonameMatchType::Name,
2422                    prefix: false,
2423                }],
2424            },
2425            Test {
2426                query: "waterloo",
2427                match_name_prefix: false,
2428                filter: Some(vec![on()]),
2429                expected: vec![GeonameMatch {
2430                    geoname: waterloo_on(),
2431                    match_type: GeonameMatchType::Name,
2432                    prefix: false,
2433                }],
2434            },
2435            Test {
2436                query: "waterloo",
2437                match_name_prefix: false,
2438                filter: Some(vec![on(), canada()]),
2439                expected: vec![GeonameMatch {
2440                    geoname: waterloo_on(),
2441                    match_type: GeonameMatchType::Name,
2442                    prefix: false,
2443                }],
2444            },
2445            Test {
2446                query: "waterloo",
2447                match_name_prefix: false,
2448                filter: Some(vec![canada(), on()]),
2449                expected: vec![GeonameMatch {
2450                    geoname: waterloo_on(),
2451                    match_type: GeonameMatchType::Name,
2452                    prefix: false,
2453                }],
2454            },
2455            Test {
2456                query: "waterloo",
2457                match_name_prefix: false,
2458                filter: Some(vec![al(), canada()]),
2459                expected: vec![],
2460            },
2461            Test {
2462                query: "waterloo",
2463                match_name_prefix: false,
2464                filter: Some(vec![on(), us()]),
2465                expected: vec![],
2466            },
2467            Test {
2468                query: "waterloo",
2469                match_name_prefix: false,
2470                filter: Some(vec![waterloo_al()]),
2471                expected: vec![GeonameMatch {
2472                    geoname: waterloo_al(),
2473                    match_type: GeonameMatchType::Name,
2474                    prefix: false,
2475                }],
2476            },
2477            Test {
2478                query: "waterloo",
2479                match_name_prefix: false,
2480                filter: Some(vec![uk()]),
2481                expected: vec![],
2482            },
2483            Test {
2484                query: "waterlooxyz",
2485                match_name_prefix: false,
2486                filter: None,
2487                expected: vec![],
2488            },
2489            Test {
2490                query: "waterlooxyz",
2491                match_name_prefix: true,
2492                filter: None,
2493                expected: vec![],
2494            },
2495            Test {
2496                query: "waterloo xyz",
2497                match_name_prefix: false,
2498                filter: None,
2499                expected: vec![],
2500            },
2501            Test {
2502                query: "waterloo xyz",
2503                match_name_prefix: true,
2504                filter: None,
2505                expected: vec![],
2506            },
2507            Test {
2508                query: "ny",
2509                match_name_prefix: false,
2510                filter: None,
2511                // NYC should be first since cities are ordered before regions.
2512                expected: vec![
2513                    GeonameMatch {
2514                        geoname: nyc(),
2515                        match_type: GeonameMatchType::Abbreviation,
2516                        prefix: false,
2517                    },
2518                    GeonameMatch {
2519                        geoname: ny_state(),
2520                        match_type: GeonameMatchType::Abbreviation,
2521                        prefix: false,
2522                    },
2523                ],
2524            },
2525            Test {
2526                query: "ny",
2527                match_name_prefix: false,
2528                filter: Some(vec![nyc()]),
2529                expected: vec![
2530                    GeonameMatch {
2531                        geoname: nyc(),
2532                        match_type: GeonameMatchType::Abbreviation,
2533                        prefix: false,
2534                    },
2535                    GeonameMatch {
2536                        geoname: ny_state(),
2537                        match_type: GeonameMatchType::Abbreviation,
2538                        prefix: false,
2539                    },
2540                ],
2541            },
2542            Test {
2543                query: "ny",
2544                match_name_prefix: false,
2545                filter: Some(vec![ny_state()]),
2546                expected: vec![
2547                    GeonameMatch {
2548                        geoname: nyc(),
2549                        match_type: GeonameMatchType::Abbreviation,
2550                        prefix: false,
2551                    },
2552                    GeonameMatch {
2553                        geoname: ny_state(),
2554                        match_type: GeonameMatchType::Abbreviation,
2555                        prefix: false,
2556                    },
2557                ],
2558            },
2559            Test {
2560                query: "nyc",
2561                match_name_prefix: false,
2562                filter: None,
2563                expected: vec![GeonameMatch {
2564                    geoname: nyc(),
2565                    match_type: GeonameMatchType::Abbreviation,
2566                    prefix: false,
2567                }],
2568            },
2569            Test {
2570                query: "NeW YoRk",
2571                match_name_prefix: false,
2572                filter: None,
2573                expected: vec![
2574                    GeonameMatch {
2575                        geoname: nyc(),
2576                        match_type: GeonameMatchType::Name,
2577                        prefix: false,
2578                    },
2579                    GeonameMatch {
2580                        geoname: ny_state(),
2581                        match_type: GeonameMatchType::Name,
2582                        prefix: false,
2583                    },
2584                ],
2585            },
2586            Test {
2587                query: "NY",
2588                match_name_prefix: false,
2589                filter: None,
2590                expected: vec![
2591                    GeonameMatch {
2592                        geoname: nyc(),
2593                        match_type: GeonameMatchType::Abbreviation,
2594                        prefix: false,
2595                    },
2596                    GeonameMatch {
2597                        geoname: ny_state(),
2598                        match_type: GeonameMatchType::Abbreviation,
2599                        prefix: false,
2600                    },
2601                ],
2602            },
2603            Test {
2604                query: "new",
2605                match_name_prefix: false,
2606                filter: None,
2607                expected: vec![],
2608            },
2609            Test {
2610                query: "new",
2611                match_name_prefix: true,
2612                filter: None,
2613                expected: vec![
2614                    GeonameMatch {
2615                        geoname: nyc(),
2616                        match_type: GeonameMatchType::Name,
2617                        prefix: true,
2618                    },
2619                    GeonameMatch {
2620                        geoname: new_orleans(),
2621                        match_type: GeonameMatchType::Name,
2622                        prefix: true,
2623                    },
2624                    GeonameMatch {
2625                        geoname: ny_state(),
2626                        match_type: GeonameMatchType::Name,
2627                        prefix: true,
2628                    },
2629                ],
2630            },
2631            Test {
2632                query: "new york foo",
2633                match_name_prefix: false,
2634                filter: None,
2635                expected: vec![],
2636            },
2637            Test {
2638                query: "new york foo",
2639                match_name_prefix: true,
2640                filter: None,
2641                expected: vec![],
2642            },
2643            Test {
2644                query: "new foo",
2645                match_name_prefix: true,
2646                filter: None,
2647                expected: vec![],
2648            },
2649            Test {
2650                query: "foo new york",
2651                match_name_prefix: false,
2652                filter: None,
2653                expected: vec![],
2654            },
2655            Test {
2656                query: "foo new york",
2657                match_name_prefix: true,
2658                filter: None,
2659                expected: vec![],
2660            },
2661            Test {
2662                query: "foo new",
2663                match_name_prefix: true,
2664                filter: None,
2665                expected: vec![],
2666            },
2667            Test {
2668                query: "roc",
2669                match_name_prefix: false,
2670                filter: None,
2671                expected: vec![GeonameMatch {
2672                    geoname: rochester(),
2673                    match_type: GeonameMatchType::AirportCode,
2674                    prefix: false,
2675                }],
2676            },
2677            // "roc" is both a name prefix and an airport code.
2678            Test {
2679                query: "roc",
2680                match_name_prefix: true,
2681                filter: None,
2682                expected: vec![
2683                    GeonameMatch {
2684                        geoname: rochester(),
2685                        match_type: GeonameMatchType::Name,
2686                        prefix: true,
2687                    },
2688                    GeonameMatch {
2689                        geoname: rochester(),
2690                        match_type: GeonameMatchType::AirportCode,
2691                        prefix: false,
2692                    },
2693                ],
2694            },
2695            Test {
2696                query: "123 long name",
2697                match_name_prefix: false,
2698                filter: None,
2699                expected: vec![GeonameMatch {
2700                    geoname: long_name_city(),
2701                    match_type: GeonameMatchType::Name,
2702                    prefix: false,
2703                }],
2704            },
2705            Test {
2706                query: LONG_NAME,
2707                match_name_prefix: false,
2708                filter: None,
2709                expected: vec![GeonameMatch {
2710                    geoname: long_name_city(),
2711                    match_type: GeonameMatchType::Name,
2712                    prefix: false,
2713                }],
2714            },
2715            Test {
2716                query: "ac",
2717                match_name_prefix: false,
2718                filter: None,
2719                expected: vec![],
2720            },
2721            Test {
2722                query: "ac",
2723                match_name_prefix: true,
2724                filter: None,
2725                expected: vec![],
2726            },
2727            Test {
2728                query: "act",
2729                match_name_prefix: false,
2730                filter: None,
2731                expected: vec![GeonameMatch {
2732                    geoname: waco(),
2733                    match_type: GeonameMatchType::AirportCode,
2734                    prefix: false,
2735                }],
2736            },
2737            Test {
2738                query: "act",
2739                match_name_prefix: true,
2740                filter: None,
2741                expected: vec![GeonameMatch {
2742                    geoname: waco(),
2743                    match_type: GeonameMatchType::AirportCode,
2744                    prefix: false,
2745                }],
2746            },
2747            Test {
2748                query: "us",
2749                match_name_prefix: false,
2750                filter: None,
2751                expected: vec![GeonameMatch {
2752                    geoname: us(),
2753                    match_type: GeonameMatchType::Abbreviation,
2754                    prefix: false,
2755                }],
2756            },
2757            Test {
2758                query: "us",
2759                match_name_prefix: false,
2760                filter: Some(vec![waterloo_ia()]),
2761                expected: vec![GeonameMatch {
2762                    geoname: us(),
2763                    match_type: GeonameMatchType::Abbreviation,
2764                    prefix: false,
2765                }],
2766            },
2767            Test {
2768                query: "us",
2769                match_name_prefix: false,
2770                filter: Some(vec![ia()]),
2771                expected: vec![GeonameMatch {
2772                    geoname: us(),
2773                    match_type: GeonameMatchType::Abbreviation,
2774                    prefix: false,
2775                }],
2776            },
2777            Test {
2778                query: "canada",
2779                match_name_prefix: false,
2780                filter: None,
2781                expected: vec![GeonameMatch {
2782                    geoname: canada(),
2783                    match_type: GeonameMatchType::Name,
2784                    prefix: false,
2785                }],
2786            },
2787            Test {
2788                query: "canada",
2789                match_name_prefix: false,
2790                filter: Some(vec![on()]),
2791                expected: vec![GeonameMatch {
2792                    geoname: canada(),
2793                    match_type: GeonameMatchType::Name,
2794                    prefix: false,
2795                }],
2796            },
2797            Test {
2798                query: "canada",
2799                match_name_prefix: false,
2800                filter: Some(vec![waterloo_on(), on()]),
2801                expected: vec![GeonameMatch {
2802                    geoname: canada(),
2803                    match_type: GeonameMatchType::Name,
2804                    prefix: false,
2805                }],
2806            },
2807            Test {
2808                query: "uk",
2809                match_name_prefix: false,
2810                filter: None,
2811                expected: vec![
2812                    // "UK" is listed as both an 'en' alternate and 'abbr'
2813                    // alternate. The abbreviation should be first since 'abbr'
2814                    // is ordered before 'en'.
2815                    GeonameMatch {
2816                        geoname: uk(),
2817                        match_type: GeonameMatchType::Abbreviation,
2818                        prefix: false,
2819                    },
2820                    GeonameMatch {
2821                        geoname: uk(),
2822                        match_type: GeonameMatchType::Name,
2823                        prefix: false,
2824                    },
2825                ],
2826            },
2827            Test {
2828                query: "st. louis",
2829                match_name_prefix: false,
2830                filter: None,
2831                expected: vec![GeonameMatch {
2832                    geoname: st_louis(),
2833                    match_type: GeonameMatchType::Name,
2834                    prefix: false,
2835                }],
2836            },
2837            Test {
2838                query: "st louis",
2839                match_name_prefix: false,
2840                filter: None,
2841                expected: vec![GeonameMatch {
2842                    geoname: st_louis(),
2843                    match_type: GeonameMatchType::Name,
2844                    prefix: false,
2845                }],
2846            },
2847            Test {
2848                query: "st.",
2849                match_name_prefix: true,
2850                filter: None,
2851                expected: vec![GeonameMatch {
2852                    geoname: st_louis(),
2853                    match_type: GeonameMatchType::Name,
2854                    prefix: true,
2855                }],
2856            },
2857            Test {
2858                query: "st. l",
2859                match_name_prefix: true,
2860                filter: None,
2861                expected: vec![GeonameMatch {
2862                    geoname: st_louis(),
2863                    match_type: GeonameMatchType::Name,
2864                    prefix: true,
2865                }],
2866            },
2867            Test {
2868                query: "st l",
2869                match_name_prefix: true,
2870                filter: None,
2871                expected: vec![GeonameMatch {
2872                    geoname: st_louis(),
2873                    match_type: GeonameMatchType::Name,
2874                    prefix: true,
2875                }],
2876            },
2877            Test {
2878                query: "st.",
2879                match_name_prefix: false,
2880                filter: None,
2881                expected: vec![],
2882            },
2883            Test {
2884                query: "st l",
2885                match_name_prefix: false,
2886                filter: None,
2887                expected: vec![],
2888            },
2889            Test {
2890                query: "carmel-by-the-sea",
2891                match_name_prefix: false,
2892                filter: None,
2893                expected: vec![GeonameMatch {
2894                    geoname: carmel(),
2895                    match_type: GeonameMatchType::Name,
2896                    prefix: false,
2897                }],
2898            },
2899            Test {
2900                query: "carmel by the sea",
2901                match_name_prefix: false,
2902                filter: None,
2903                expected: vec![GeonameMatch {
2904                    geoname: carmel(),
2905                    match_type: GeonameMatchType::Name,
2906                    prefix: false,
2907                }],
2908            },
2909            Test {
2910                query: "carmel-",
2911                match_name_prefix: true,
2912                filter: None,
2913                expected: vec![GeonameMatch {
2914                    geoname: carmel(),
2915                    match_type: GeonameMatchType::Name,
2916                    prefix: true,
2917                }],
2918            },
2919            Test {
2920                query: "carmel-b",
2921                match_name_prefix: true,
2922                filter: None,
2923                expected: vec![GeonameMatch {
2924                    geoname: carmel(),
2925                    match_type: GeonameMatchType::Name,
2926                    prefix: true,
2927                }],
2928            },
2929            Test {
2930                query: "carmel b",
2931                match_name_prefix: true,
2932                filter: None,
2933                expected: vec![GeonameMatch {
2934                    geoname: carmel(),
2935                    match_type: GeonameMatchType::Name,
2936                    prefix: true,
2937                }],
2938            },
2939            Test {
2940                query: "carmel-",
2941                match_name_prefix: false,
2942                filter: None,
2943                expected: vec![],
2944            },
2945            Test {
2946                query: "carmel-b",
2947                match_name_prefix: false,
2948                filter: None,
2949                expected: vec![],
2950            },
2951            Test {
2952                query: "carmel b",
2953                match_name_prefix: false,
2954                filter: None,
2955                expected: vec![],
2956            },
2957            Test {
2958                query: "liverpool",
2959                match_name_prefix: false,
2960                filter: None,
2961                expected: vec![
2962                    GeonameMatch {
2963                        geoname: liverpool_city(),
2964                        match_type: GeonameMatchType::Name,
2965                        prefix: false,
2966                    },
2967                    GeonameMatch {
2968                        geoname: liverpool_metro(),
2969                        match_type: GeonameMatchType::Name,
2970                        prefix: false,
2971                    },
2972                ],
2973            },
2974            Test {
2975                query: "liverpool",
2976                match_name_prefix: false,
2977                filter: Some(vec![liverpool_metro()]),
2978                expected: vec![
2979                    GeonameMatch {
2980                        geoname: liverpool_city(),
2981                        match_type: GeonameMatchType::Name,
2982                        prefix: false,
2983                    },
2984                    GeonameMatch {
2985                        geoname: liverpool_metro(),
2986                        match_type: GeonameMatchType::Name,
2987                        prefix: false,
2988                    },
2989                ],
2990            },
2991            Test {
2992                query: "liverpool",
2993                match_name_prefix: false,
2994                filter: Some(vec![england()]),
2995                expected: vec![
2996                    GeonameMatch {
2997                        geoname: liverpool_city(),
2998                        match_type: GeonameMatchType::Name,
2999                        prefix: false,
3000                    },
3001                    GeonameMatch {
3002                        geoname: liverpool_metro(),
3003                        match_type: GeonameMatchType::Name,
3004                        prefix: false,
3005                    },
3006                ],
3007            },
3008            Test {
3009                query: "liverpool",
3010                match_name_prefix: false,
3011                filter: Some(vec![uk()]),
3012                expected: vec![
3013                    GeonameMatch {
3014                        geoname: liverpool_city(),
3015                        match_type: GeonameMatchType::Name,
3016                        prefix: false,
3017                    },
3018                    GeonameMatch {
3019                        geoname: liverpool_metro(),
3020                        match_type: GeonameMatchType::Name,
3021                        prefix: false,
3022                    },
3023                ],
3024            },
3025            Test {
3026                query: "liverpool",
3027                match_name_prefix: false,
3028                filter: Some(vec![liverpool_metro(), england()]),
3029                expected: vec![
3030                    GeonameMatch {
3031                        geoname: liverpool_city(),
3032                        match_type: GeonameMatchType::Name,
3033                        prefix: false,
3034                    },
3035                    GeonameMatch {
3036                        geoname: liverpool_metro(),
3037                        match_type: GeonameMatchType::Name,
3038                        prefix: false,
3039                    },
3040                ],
3041            },
3042            Test {
3043                query: "liverpool",
3044                match_name_prefix: false,
3045                filter: Some(vec![liverpool_metro(), uk()]),
3046                expected: vec![
3047                    GeonameMatch {
3048                        geoname: liverpool_city(),
3049                        match_type: GeonameMatchType::Name,
3050                        prefix: false,
3051                    },
3052                    GeonameMatch {
3053                        geoname: liverpool_metro(),
3054                        match_type: GeonameMatchType::Name,
3055                        prefix: false,
3056                    },
3057                ],
3058            },
3059            Test {
3060                query: "liverpool",
3061                match_name_prefix: false,
3062                filter: Some(vec![england(), uk()]),
3063                expected: vec![
3064                    GeonameMatch {
3065                        geoname: liverpool_city(),
3066                        match_type: GeonameMatchType::Name,
3067                        prefix: false,
3068                    },
3069                    GeonameMatch {
3070                        geoname: liverpool_metro(),
3071                        match_type: GeonameMatchType::Name,
3072                        prefix: false,
3073                    },
3074                ],
3075            },
3076            Test {
3077                query: "liverpool",
3078                match_name_prefix: false,
3079                filter: Some(vec![liverpool_metro(), england(), uk()]),
3080                expected: vec![
3081                    GeonameMatch {
3082                        geoname: liverpool_city(),
3083                        match_type: GeonameMatchType::Name,
3084                        prefix: false,
3085                    },
3086                    GeonameMatch {
3087                        geoname: liverpool_metro(),
3088                        match_type: GeonameMatchType::Name,
3089                        prefix: false,
3090                    },
3091                ],
3092            },
3093            Test {
3094                query: "gößnitz",
3095                match_name_prefix: false,
3096                filter: None,
3097                expected: vec![GeonameMatch {
3098                    geoname: goessnitz(),
3099                    match_type: GeonameMatchType::Name,
3100                    prefix: false,
3101                }],
3102            },
3103            Test {
3104                query: "gössnitz",
3105                match_name_prefix: false,
3106                filter: None,
3107                expected: vec![GeonameMatch {
3108                    geoname: goessnitz(),
3109                    match_type: GeonameMatchType::Name,
3110                    prefix: false,
3111                }],
3112            },
3113            Test {
3114                query: "goßnitz",
3115                match_name_prefix: false,
3116                filter: None,
3117                expected: vec![GeonameMatch {
3118                    geoname: goessnitz(),
3119                    match_type: GeonameMatchType::Name,
3120                    prefix: false,
3121                }],
3122            },
3123            Test {
3124                query: "gossnitz",
3125                match_name_prefix: false,
3126                filter: None,
3127                expected: vec![GeonameMatch {
3128                    geoname: goessnitz(),
3129                    match_type: GeonameMatchType::Name,
3130                    prefix: false,
3131                }],
3132            },
3133            Test {
3134                query: "goessnitz",
3135                match_name_prefix: false,
3136                filter: None,
3137                expected: vec![GeonameMatch {
3138                    geoname: goessnitz(),
3139                    match_type: GeonameMatchType::Name,
3140                    prefix: false,
3141                }],
3142            },
3143            Test {
3144                query: "gö",
3145                match_name_prefix: true,
3146                filter: None,
3147                expected: vec![GeonameMatch {
3148                    geoname: goessnitz(),
3149                    match_type: GeonameMatchType::Name,
3150                    prefix: true,
3151                }],
3152            },
3153            Test {
3154                query: "göß",
3155                match_name_prefix: true,
3156                filter: None,
3157                expected: vec![GeonameMatch {
3158                    geoname: goessnitz(),
3159                    match_type: GeonameMatchType::Name,
3160                    prefix: true,
3161                }],
3162            },
3163            Test {
3164                query: "gößn",
3165                match_name_prefix: true,
3166                filter: None,
3167                expected: vec![GeonameMatch {
3168                    geoname: goessnitz(),
3169                    match_type: GeonameMatchType::Name,
3170                    prefix: true,
3171                }],
3172            },
3173            Test {
3174                query: "gös",
3175                match_name_prefix: true,
3176                filter: None,
3177                expected: vec![GeonameMatch {
3178                    geoname: goessnitz(),
3179                    match_type: GeonameMatchType::Name,
3180                    prefix: true,
3181                }],
3182            },
3183            Test {
3184                query: "goß",
3185                match_name_prefix: true,
3186                filter: None,
3187                expected: vec![GeonameMatch {
3188                    geoname: goessnitz(),
3189                    match_type: GeonameMatchType::Name,
3190                    prefix: true,
3191                }],
3192            },
3193            Test {
3194                query: "goßn",
3195                match_name_prefix: true,
3196                filter: None,
3197                expected: vec![GeonameMatch {
3198                    geoname: goessnitz(),
3199                    match_type: GeonameMatchType::Name,
3200                    prefix: true,
3201                }],
3202            },
3203            Test {
3204                query: "gos",
3205                match_name_prefix: true,
3206                filter: None,
3207                expected: vec![GeonameMatch {
3208                    geoname: goessnitz(),
3209                    match_type: GeonameMatchType::Name,
3210                    prefix: true,
3211                }],
3212            },
3213            Test {
3214                query: "goss",
3215                match_name_prefix: true,
3216                filter: None,
3217                expected: vec![GeonameMatch {
3218                    geoname: goessnitz(),
3219                    match_type: GeonameMatchType::Name,
3220                    prefix: true,
3221                }],
3222            },
3223        ];
3224
3225        store.read(|dao| {
3226            for t in tests {
3227                let gs = t.filter.clone().unwrap_or_default();
3228                let gs_refs: Vec<_> = gs.iter().collect();
3229                let filters = if gs_refs.is_empty() {
3230                    None
3231                } else {
3232                    Some(gs_refs)
3233                };
3234                assert_eq!(
3235                    dao.fetch_geonames(t.query, t.match_name_prefix, filters)?,
3236                    t.expected,
3237                    "query={:?} -- Full test: {:?}",
3238                    t.query,
3239                    t
3240                );
3241            }
3242            Ok(())
3243        })?;
3244
3245        Ok(())
3246    }
3247
3248    #[test]
3249    fn geonames_metrics() -> anyhow::Result<()> {
3250        before_each();
3251
3252        // Add a some records: a core geonames record and some alternates
3253        // records. The names in each should contribute to metrics.
3254        let mut store = TestStore::new(
3255            MockRemoteSettingsClient::default()
3256                .with_record(geoname_mock_record(
3257                    "geonames-0",
3258                    json!([
3259                        {
3260                            "id": 4096497,
3261                            "name": "Waterloo",
3262                            "feature_class": "P",
3263                            "feature_code": "PPL",
3264                            "country": "US",
3265                            "admin1": "AL",
3266                            "admin2": "077",
3267                            "population": 200,
3268                            "latitude": "34.91814",
3269                            "longitude": "-88.0642",
3270                        },
3271                    ]),
3272                ))
3273                .with_record(geoname_alternates_mock_record(
3274                    "geonames-alternates-0",
3275                    json!({
3276                        "language": "en",
3277                        "alternates_by_geoname_id": [
3278                            [4096497, ["a b c d e"]],
3279                        ],
3280                    }),
3281                ))
3282                .with_record(geoname_alternates_mock_record(
3283                    "geonames-alternates-1",
3284                    json!({
3285                        "language": "en",
3286                        "alternates_by_geoname_id": [
3287                            [1, ["abcdefghik lmnopqrstu"]],
3288                        ],
3289                    }),
3290                )),
3291        );
3292
3293        // Ingest weather to also ingest geonames.
3294        store.ingest(SuggestIngestionConstraints {
3295            providers: Some(vec![SuggestionProvider::Weather]),
3296            ..SuggestIngestionConstraints::all_providers()
3297        });
3298
3299        store.read(|dao| {
3300            let cache = dao.geoname_cache();
3301            assert_eq!(cache.keywords_metrics.max_len, 21); // "abcdefghik lmnopqrstu"
3302            assert_eq!(cache.keywords_metrics.max_word_count, 5); // "a b c d e"
3303            Ok(())
3304        })?;
3305
3306        // Delete the first alternates record. The metrics should change.
3307        store
3308            .client_mut()
3309            .delete_record(geoname_mock_record("geonames-alternates-0", json!({})));
3310        store.ingest(SuggestIngestionConstraints {
3311            providers: Some(vec![SuggestionProvider::Weather]),
3312            ..SuggestIngestionConstraints::all_providers()
3313        });
3314        store.read(|dao| {
3315            let cache = dao.geoname_cache();
3316            assert_eq!(cache.keywords_metrics.max_len, 21); // "abcdefghik lmnopqrstu"
3317            assert_eq!(cache.keywords_metrics.max_word_count, 2); // "abcdefghik lmnopqrstu"
3318            Ok(())
3319        })?;
3320
3321        // Delete the second alternates record. The metrics should change again.
3322        store
3323            .client_mut()
3324            .delete_record(geoname_mock_record("geonames-alternates-1", json!({})));
3325        store.ingest(SuggestIngestionConstraints {
3326            providers: Some(vec![SuggestionProvider::Weather]),
3327            ..SuggestIngestionConstraints::all_providers()
3328        });
3329        store.read(|dao| {
3330            let cache = dao.geoname_cache();
3331            assert_eq!(cache.keywords_metrics.max_len, 8); // "waterloo"
3332            assert_eq!(cache.keywords_metrics.max_word_count, 1); // "waterloo"
3333            Ok(())
3334        })?;
3335
3336        // Add a new record. The metrics should change again.
3337        store
3338            .client_mut()
3339            .add_record(geoname_alternates_mock_record(
3340                "geonames-alternates-2",
3341                json!({
3342                    "language": "en",
3343                    "alternates_by_geoname_id": [
3344                        [2, ["abcd efgh iklm"]],
3345                    ],
3346                }),
3347            ));
3348        store.ingest(SuggestIngestionConstraints {
3349            providers: Some(vec![SuggestionProvider::Weather]),
3350            ..SuggestIngestionConstraints::all_providers()
3351        });
3352        store.read(|dao| {
3353            let cache = dao.geoname_cache();
3354            assert_eq!(cache.keywords_metrics.max_len, 14); // "abcd efgh iklm"
3355            assert_eq!(cache.keywords_metrics.max_word_count, 3); // "abcd efgh iklm"
3356            Ok(())
3357        })?;
3358
3359        Ok(())
3360    }
3361
3362    #[test]
3363    fn geonames_deleted_record() -> anyhow::Result<()> {
3364        before_each();
3365
3366        // Create the store with the test data and ingest.
3367        let mut store = new_test_store();
3368        store.ingest(SuggestIngestionConstraints {
3369            providers: Some(vec![SuggestionProvider::Weather]),
3370            ..SuggestIngestionConstraints::all_providers()
3371        });
3372
3373        // Make sure we have a match.
3374        store.read(|dao| {
3375            assert_eq!(
3376                dao.fetch_geonames("waterloo", false, None)?,
3377                vec![
3378                    GeonameMatch {
3379                        geoname: waterloo_on(),
3380                        match_type: GeonameMatchType::Name,
3381                        prefix: false,
3382                    },
3383                    GeonameMatch {
3384                        geoname: waterloo_ia(),
3385                        match_type: GeonameMatchType::Name,
3386                        prefix: false,
3387                    },
3388                    GeonameMatch {
3389                        geoname: waterloo_al(),
3390                        match_type: GeonameMatchType::Name,
3391                        prefix: false,
3392                    },
3393                ],
3394            );
3395            Ok(())
3396        })?;
3397
3398        // Delete the record.
3399        store
3400            .client_mut()
3401            .delete_record(geoname_mock_record("geonames-0", json!({})));
3402        store.ingest(SuggestIngestionConstraints {
3403            providers: Some(vec![SuggestionProvider::Weather]),
3404            ..SuggestIngestionConstraints::all_providers()
3405        });
3406
3407        // The same query shouldn't match anymore and the tables should be
3408        // empty.
3409        store.read(|dao| {
3410            assert_eq!(dao.fetch_geonames("waterloo", false, None)?, vec![],);
3411
3412            let g_ids = dao.conn.query_rows_and_then(
3413                "SELECT id FROM geonames",
3414                [],
3415                |row| -> Result<GeonameId> { Ok(row.get("id")?) },
3416            )?;
3417            assert_eq!(g_ids, Vec::<GeonameId>::new());
3418
3419            let alt_g_ids = dao.conn.query_rows_and_then(
3420                "SELECT geoname_id FROM geonames_alternates",
3421                [],
3422                |row| -> Result<GeonameId> { Ok(row.get("geoname_id")?) },
3423            )?;
3424            assert_eq!(alt_g_ids, Vec::<GeonameId>::new());
3425
3426            Ok(())
3427        })?;
3428
3429        Ok(())
3430    }
3431
3432    #[test]
3433    fn geonames_reingest() -> anyhow::Result<()> {
3434        before_each();
3435
3436        // Create the store with the test data and ingest.
3437        let mut store = new_test_store();
3438        store.ingest(SuggestIngestionConstraints {
3439            providers: Some(vec![SuggestionProvider::Weather]),
3440            ..SuggestIngestionConstraints::all_providers()
3441        });
3442
3443        // Get the table counts.
3444        let (geonames_count, alternates_count) = store.read(|dao| {
3445            Ok((
3446                dao.conn.query_row_and_then(
3447                    "SELECT count(*) FROM geonames",
3448                    [],
3449                    |row| -> Result<i64> { Ok(row.get(0)?) },
3450                )?,
3451                dao.conn.query_row_and_then(
3452                    "SELECT count(*) FROM geonames_alternates",
3453                    [],
3454                    |row| -> Result<i64> { Ok(row.get(0)?) },
3455                )?,
3456            ))
3457        })?;
3458
3459        assert_ne!(geonames_count, 0);
3460        assert_ne!(alternates_count, 0);
3461
3462        // Delete the record and add a new record with a new ID that has the
3463        // same data.
3464        store
3465            .client_mut()
3466            .delete_record(geoname_mock_record("geonames-0", json!({})))
3467            .add_record(geoname_mock_record("geonames-1", geonames_data()));
3468
3469        // Ingest again.
3470        store.ingest(SuggestIngestionConstraints {
3471            providers: Some(vec![SuggestionProvider::Weather]),
3472            ..SuggestIngestionConstraints::all_providers()
3473        });
3474
3475        // Make sure we have a match.
3476        store.read(|dao| {
3477            assert_eq!(
3478                dao.fetch_geonames("waterloo", false, None)?,
3479                vec![
3480                    GeonameMatch {
3481                        geoname: waterloo_on(),
3482                        match_type: GeonameMatchType::Name,
3483                        prefix: false,
3484                    },
3485                    GeonameMatch {
3486                        geoname: waterloo_ia(),
3487                        match_type: GeonameMatchType::Name,
3488                        prefix: false,
3489                    },
3490                    GeonameMatch {
3491                        geoname: waterloo_al(),
3492                        match_type: GeonameMatchType::Name,
3493                        prefix: false,
3494                    },
3495                ],
3496            );
3497            Ok(())
3498        })?;
3499
3500        // Get the table counts again. They should be the same as before.
3501        let (new_geonames_count, new_alternates_count) = store.read(|dao| {
3502            Ok((
3503                dao.conn.query_row_and_then(
3504                    "SELECT count(*) FROM geonames",
3505                    [],
3506                    |row| -> Result<i64> { Ok(row.get(0)?) },
3507                )?,
3508                dao.conn.query_row_and_then(
3509                    "SELECT count(*) FROM geonames_alternates",
3510                    [],
3511                    |row| -> Result<i64> { Ok(row.get(0)?) },
3512                )?,
3513            ))
3514        })?;
3515
3516        assert_eq!(geonames_count, new_geonames_count);
3517        assert_eq!(alternates_count, new_alternates_count);
3518
3519        Ok(())
3520    }
3521
3522    #[test]
3523    fn geonames_store_api() -> anyhow::Result<()> {
3524        before_each();
3525
3526        let store = new_test_store();
3527
3528        // Ingest weather to also ingest geonames.
3529        store.ingest(SuggestIngestionConstraints {
3530            providers: Some(vec![SuggestionProvider::Weather]),
3531            ..SuggestIngestionConstraints::all_providers()
3532        });
3533
3534        #[derive(Debug)]
3535        struct Test {
3536            query: &'static str,
3537            match_name_prefix: bool,
3538            filter: Option<Vec<Geoname>>,
3539            expected: Vec<GeonameMatch>,
3540        }
3541
3542        // This only tests a few different calls to exercise all the fetch
3543        // options. Comprehensive fetch cases are in the main `geonames` test.
3544        let tests = [
3545            // simple fetch with no options
3546            Test {
3547                query: "ia",
3548                match_name_prefix: false,
3549                filter: None,
3550                expected: vec![GeonameMatch {
3551                    geoname: ia(),
3552                    match_type: GeonameMatchType::Abbreviation,
3553                    prefix: false,
3554                }],
3555            },
3556            // filter
3557            Test {
3558                query: "ia",
3559                match_name_prefix: false,
3560                filter: Some(vec![waterloo_ia()]),
3561                expected: vec![GeonameMatch {
3562                    geoname: ia(),
3563                    match_type: GeonameMatchType::Abbreviation,
3564                    prefix: false,
3565                }],
3566            },
3567            // prefix matching
3568            Test {
3569                query: "ny",
3570                match_name_prefix: true,
3571                filter: None,
3572                expected: vec![
3573                    GeonameMatch {
3574                        geoname: nyc(),
3575                        match_type: GeonameMatchType::Abbreviation,
3576                        prefix: false,
3577                    },
3578                    GeonameMatch {
3579                        geoname: ny_state(),
3580                        match_type: GeonameMatchType::Abbreviation,
3581                        prefix: false,
3582                    },
3583                ],
3584            },
3585        ];
3586
3587        for t in tests {
3588            assert_eq!(
3589                store.fetch_geonames(t.query, t.match_name_prefix, t.filter.clone()),
3590                t.expected,
3591                "Test: {:?}",
3592                t
3593            );
3594        }
3595
3596        Ok(())
3597    }
3598}