suggest/
weather.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 */
5
6use rusqlite::named_params;
7use serde::Deserialize;
8use sql_support::ConnExt;
9
10use std::{cmp::Ordering, collections::HashSet};
11
12use crate::{
13    config::SuggestProviderConfig,
14    db::{
15        KeywordInsertStatement, KeywordsMetrics, KeywordsMetricsUpdater, SuggestDao,
16        SuggestionInsertStatement, DEFAULT_SUGGESTION_SCORE,
17    },
18    geoname::GeonameMatch,
19    metrics::MetricsContext,
20    provider::SuggestionProvider,
21    rs::{Client, Record, SuggestRecordId, SuggestRecordType},
22    store::SuggestStoreInner,
23    suggestion::Suggestion,
24    util::filter_map_chunks,
25    Result, SuggestionQuery,
26};
27
28#[derive(Clone, Debug, Deserialize)]
29pub(crate) struct DownloadedWeatherAttachment {
30    /// Weather keywords.
31    pub keywords: Vec<String>,
32    /// Threshold for weather keyword prefix matching when a weather keyword is
33    /// the first term in a query. `None` means prefix matching is disabled and
34    /// weather keywords must be typed in full when they are first in the query.
35    /// This threshold does not apply to city and region names. If there are
36    /// multiple weather records, we use the `min_keyword_length` in the most
37    /// recently ingested record.
38    pub min_keyword_length: Option<i32>,
39    /// Score for weather suggestions. If there are multiple weather records, we
40    /// use the `score` from the most recently ingested record.
41    pub score: Option<f64>,
42}
43
44/// This data is used to service every query handled by the weather provider, so
45/// we cache it from the DB.
46#[derive(Debug, Default)]
47pub struct WeatherCache {
48    /// Cached value of the same name from `SuggestProviderConfig::Weather`.
49    min_keyword_length: usize,
50    /// Cached value of the same name from `SuggestProviderConfig::Weather`.
51    score: f64,
52    /// Cached weather keywords metrics.
53    keywords_metrics: KeywordsMetrics,
54}
55
56impl SuggestDao<'_> {
57    /// Fetches weather suggestions.
58    pub fn fetch_weather_suggestions(&self, query: &SuggestionQuery) -> Result<Vec<Suggestion>> {
59        // We'll just stipulate we won't support tiny queries in order to avoid
60        // a bunch of work when the user starts typing a query.
61        if query.keyword.len() < 3 {
62            return Ok(vec![]);
63        }
64
65        // The first step in parsing the query is splitting it into words. We
66        // want to avoid that work for strings that are so long they can't
67        // possibly match. We'll stipulate that weather queries will include the
68        // following parts at most:
69        //
70        // * 3 geonames max: city + one admin division like a state + country
71        // * 1 weather keyword
72        // * 3 spaces between the previous geonames and keyword
73        // * 10 extra chars to allow for extra spaces and punctuation
74        //
75        // This will exclude some valid queries because the logic below allows
76        // for multiple weather keywords, and a city may have more than one
77        // admin division, but we don't expect many users to type such long
78        // queries.
79        //
80        // There's no point in an analogous min length check since weather
81        // suggestions can be matched on city alone and many city names are only
82        // a few characters long ("nyc").
83
84        let g_cache = self.geoname_cache();
85        let w_cache = self.weather_cache();
86        let max_query_len =
87            3 * g_cache.keywords_metrics.max_len + w_cache.keywords_metrics.max_len + 10;
88        if max_query_len < query.keyword.len() {
89            return Ok(vec![]);
90        }
91
92        let max_chunk_size = std::cmp::max(
93            g_cache.keywords_metrics.max_word_count,
94            w_cache.keywords_metrics.max_word_count,
95        );
96
97        // Split the query on whitespace and commas too so that queries like
98        // "city region" and "city, region" both become ["city", "region"].
99        let words: Vec<_> = query
100            .keyword
101            .split(|c| char::is_whitespace(c) || c == ',')
102            .filter(|s| !s.is_empty())
103            .collect();
104
105        // Step 2: Parse the query words into a list of token paths.
106        let raw_token_paths =
107            filter_map_chunks::<Token>(&words, max_chunk_size, |chunk, chunk_i, is_last, path| {
108                // Find all token types that match the chunk.
109                let mut all_tokens: Option<Vec<Token>> = None;
110                for tt in [TokenType::Geoname, TokenType::WeatherKeyword] {
111                    let mut tokens =
112                        self.match_weather_tokens(tt, path, chunk, chunk_i == 0, is_last)?;
113                    if !tokens.is_empty() {
114                        let mut ts = all_tokens.take().unwrap_or_default();
115                        ts.append(&mut tokens);
116                        all_tokens.replace(ts);
117                    }
118                }
119                // If no tokens were matched, `all_tokens` will be `None`.
120                Ok(all_tokens)
121            })?;
122
123        // Step 3: Map each valid token path to a `TokenPath` and discard
124        // invalid paths. Save the paths that include cities. For paths that
125        // include keywords alone, only keep track of the minimum keyword count
126        // across all paths. e.g., if one path has one keyword alone and another
127        // path has two keywords alone, set `kws_alone_min_count` to 1.
128        let mut kws_alone_min_count: Option<usize> = None;
129        let mut city_token_paths: Vec<_> = raw_token_paths
130            .into_iter()
131            .filter_map(|rtp| {
132                TokenPath::from_raw_token_path(rtp).and_then(|tp| match tp {
133                    TokenPath::City(ctp) => Some(ctp),
134                    TokenPath::WeatherKeywordsAlone(count) => {
135                        kws_alone_min_count = kws_alone_min_count
136                            .map(|min_count| std::cmp::min(min_count, count))
137                            .or(Some(count));
138                        None
139                    }
140                })
141            })
142            .collect();
143
144        // Step 4: If any token path is one keyword alone, return a suggestion
145        // without a city even if there are other token paths with cities. In
146        // other words, greedily match on a single keyword. As a simplified
147        // example, if "rain" and "rain in" are keywords and the query is "rain
148        // in", there will be two token paths:
149        //
150        // 1. "rain in" keyword alone
151        // 2. "rain" keyword + Indianapolis city match (for example)
152        //
153        // We want to return a suggestion only for the first path, "rain in".
154        if kws_alone_min_count == Some(1) {
155            return Ok(vec![Suggestion::Weather {
156                city: None,
157                score: w_cache.score,
158            }]);
159        }
160
161        // Step 5: Sort city token paths, first by city match length descending
162        // and then by other geoname match length descending. The idea is that
163        // the more of a name the user matched, the better the match. If two
164        // paths are still equal, break the tie by population descending.
165        city_token_paths.sort_by(|ctp1, ctp2| {
166            let city_cmp = ctp2.city_match_len.cmp(&ctp1.city_match_len);
167            if city_cmp != Ordering::Equal {
168                city_cmp
169            } else {
170                let other_cmp = ctp2
171                    .other_geoname_match_len
172                    .cmp(&ctp1.other_geoname_match_len);
173                if other_cmp != Ordering::Equal {
174                    other_cmp
175                } else {
176                    ctp2.city_match
177                        .geoname
178                        .population
179                        .cmp(&ctp1.city_match.geoname.population)
180                }
181            }
182        });
183
184        // Step 6: If there are any city token paths, return suggestions for
185        // them.
186        //
187        // The cities with the max match lengths are now at the front of the
188        // list. There may be multiple matches with the max match lengths, and
189        // the same city may be represented multiple times since it may have
190        // been matched in different paths.
191        //
192        // Take all the matches with the same (max) match lengths at the front
193        // of the list and create a `Suggestion` for each unique city.
194        if let Some(first_ctp) = city_token_paths.first() {
195            let mut geoname_ids = HashSet::new();
196            let (max_city_match_len, max_other_geoname_match_len) =
197                (first_ctp.city_match_len, first_ctp.other_geoname_match_len);
198            return Ok(city_token_paths
199                .into_iter()
200                .take_while(|ctp| {
201                    ctp.city_match_len == max_city_match_len
202                        && ctp.other_geoname_match_len == max_other_geoname_match_len
203                })
204                .filter_map(|ctp| {
205                    if geoname_ids.contains(&ctp.city_match.geoname.geoname_id) {
206                        None
207                    } else {
208                        geoname_ids.insert(ctp.city_match.geoname.geoname_id);
209                        Some(Suggestion::Weather {
210                            city: Some(ctp.city_match.geoname),
211                            score: w_cache.score,
212                        })
213                    }
214                })
215                .collect());
216        }
217
218        // Step 7: If there are any paths with multiple keywords, return a
219        // single suggestion without a city.
220        if kws_alone_min_count.is_some() {
221            return Ok(vec![Suggestion::Weather {
222                city: None,
223                score: w_cache.score,
224            }]);
225        }
226
227        Ok(Vec::new())
228    }
229
230    fn match_weather_tokens(
231        &self,
232        token_type: TokenType,
233        path: &[Token],
234        candidate: &str,
235        is_first_chunk: bool,
236        is_last_chunk: bool,
237    ) -> Result<Vec<Token>> {
238        match token_type {
239            TokenType::Geoname => {
240                // Fetch matching geonames, and filter them to geonames we've
241                // already matched in this path.
242                let geonames_in_path: Vec<_> = path
243                    .iter()
244                    .filter_map(|t| t.geoname_match().map(|gm| &gm.geoname))
245                    .collect();
246                Ok(self
247                    .fetch_geonames(
248                        candidate,
249                        is_last_chunk,
250                        if geonames_in_path.is_empty() {
251                            None
252                        } else {
253                            Some(geonames_in_path)
254                        },
255                    )?
256                    .into_iter()
257                    .map(|geoname_match| Token::Geoname {
258                        geoname_match,
259                        match_len: candidate.len(),
260                    })
261                    .collect())
262            }
263            TokenType::WeatherKeyword => {
264                // See if the candidate matches a keyword. `min_keyword_length`
265                // in the config controls matching when a query contains only a
266                // keyword or keyword prefix: Zero means prefix matching is not
267                // allowed and keywords must be typed in full; non-zero means
268                // the candidate must be at least that long, even if it's a full
269                // keyword.
270                //
271                // Prefix matching is always allowed when the query contains
272                // other terms and the keyword prefix is the last term.
273                let min_len = self.weather_cache().min_keyword_length;
274                if is_first_chunk && is_last_chunk && candidate.len() < min_len {
275                    // `min_keyword_length` is non-zero, the candidate is the
276                    // only term in the query, and it's too short.
277                    Ok(vec![])
278                } else {
279                    Ok(self
280                        .match_weather_keywords(
281                            candidate,
282                            is_last_chunk && (!is_first_chunk || min_len > 0),
283                        )?
284                        .into_iter()
285                        .map(Token::WeatherKeyword)
286                        .collect())
287                }
288            }
289        }
290    }
291
292    fn match_weather_keywords(
293        &self,
294        candidate: &str,
295        prefix: bool,
296    ) -> Result<Vec<WeatherKeywordMatch>> {
297        self.conn.query_rows_and_then_cached(
298            r#"
299            SELECT
300                k.keyword != :keyword AS matched_prefix
301            FROM
302                suggestions s
303            JOIN
304                keywords_i18n k
305                ON k.suggestion_id = s.id
306            WHERE
307                s.provider = :provider
308                AND (
309                    k.keyword = :keyword
310                    OR (:prefix AND (k.keyword BETWEEN :keyword AND :keyword || X'FFFF'))
311                )
312            "#,
313            named_params! {
314                ":prefix": prefix,
315                ":keyword": candidate,
316                ":provider": SuggestionProvider::Weather
317            },
318            |row| -> Result<WeatherKeywordMatch> {
319                Ok(WeatherKeywordMatch {
320                    is_prefix: row.get("matched_prefix")?,
321                })
322            },
323        )
324    }
325
326    /// Inserts weather suggestions data into the database.
327    fn insert_weather_data(
328        &mut self,
329        record_id: &SuggestRecordId,
330        attachments: &[DownloadedWeatherAttachment],
331    ) -> Result<()> {
332        self.scope.err_if_interrupted()?;
333        let mut suggestion_insert = SuggestionInsertStatement::new(self.conn)?;
334        let mut keyword_insert =
335            KeywordInsertStatement::with_details(self.conn, "keywords_i18n", None)?;
336        let mut metrics_updater = KeywordsMetricsUpdater::new();
337
338        for attach in attachments {
339            let suggestion_id = suggestion_insert.execute(
340                record_id,
341                "",
342                "",
343                attach.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
344                SuggestionProvider::Weather,
345            )?;
346            for (i, keyword) in attach.keywords.iter().enumerate() {
347                keyword_insert.execute(suggestion_id, keyword, None, i)?;
348                metrics_updater.update(keyword);
349            }
350            self.put_provider_config(SuggestionProvider::Weather, &attach.into())?;
351        }
352
353        metrics_updater.finish(
354            self.conn,
355            record_id,
356            SuggestRecordType::Weather,
357            &mut self.weather_cache,
358        )?;
359
360        Ok(())
361    }
362
363    fn weather_cache(&self) -> &WeatherCache {
364        self.weather_cache.get_or_init(|| {
365            let mut cache = WeatherCache {
366                keywords_metrics: self
367                    .get_keywords_metrics(SuggestRecordType::Weather)
368                    .unwrap_or_default(),
369                ..WeatherCache::default()
370            };
371
372            // provider config
373            if let Ok(Some(SuggestProviderConfig::Weather {
374                score,
375                min_keyword_length,
376            })) = self.get_provider_config(SuggestionProvider::Weather)
377            {
378                cache.min_keyword_length = usize::try_from(min_keyword_length).unwrap_or_default();
379                cache.score = score;
380            }
381
382            cache
383        })
384    }
385}
386
387impl<S> SuggestStoreInner<S>
388where
389    S: Client,
390{
391    /// Inserts a weather record into the database.
392    pub fn process_weather_record(
393        &self,
394        dao: &mut SuggestDao,
395        record: &Record,
396        context: &mut MetricsContext,
397    ) -> Result<()> {
398        self.download_attachment(dao, record, context, |dao, record_id, data| {
399            dao.insert_weather_data(record_id, data)
400        })
401    }
402}
403
404impl From<&DownloadedWeatherAttachment> for SuggestProviderConfig {
405    fn from(a: &DownloadedWeatherAttachment) -> Self {
406        Self::Weather {
407            score: a.score.unwrap_or(DEFAULT_SUGGESTION_SCORE),
408            min_keyword_length: a.min_keyword_length.unwrap_or(0),
409        }
410    }
411}
412
413#[derive(Clone, Debug, Eq, Hash, PartialEq)]
414enum TokenType {
415    Geoname,
416    WeatherKeyword,
417}
418
419#[derive(Clone, Debug)]
420#[allow(clippy::large_enum_variant)]
421enum Token {
422    Geoname {
423        geoname_match: GeonameMatch,
424        match_len: usize,
425    },
426    WeatherKeyword(WeatherKeywordMatch),
427}
428
429impl Token {
430    fn geoname_match(&self) -> Option<&GeonameMatch> {
431        match self {
432            Self::Geoname { geoname_match, .. } => Some(geoname_match),
433            _ => None,
434        }
435    }
436}
437
438#[derive(Clone, Debug)]
439struct WeatherKeywordMatch {
440    is_prefix: bool,
441}
442
443#[allow(clippy::large_enum_variant)]
444enum TokenPath {
445    City(CityTokenPath),
446    // The `usize` is the number of keywords matched in the path.
447    WeatherKeywordsAlone(usize),
448}
449
450struct CityTokenPath {
451    city_match: GeonameMatch,
452    city_match_len: usize,
453    other_geoname_match_len: usize,
454}
455
456impl TokenPath {
457    fn from_raw_token_path(rtp: Vec<Token>) -> Option<Self> {
458        let mut kw_match_count = 0;
459        let mut any_kw_match_full = false;
460        let mut city_match: Option<GeonameMatch> = None;
461        let mut city_match_len = 0;
462        let mut any_other_geoname_full = false;
463        let mut max_other_geoname_match_len = 0;
464
465        for t in rtp {
466            match t {
467                Token::WeatherKeyword(kwm) => {
468                    kw_match_count += 1;
469                    any_kw_match_full = any_kw_match_full || !kwm.is_prefix;
470                }
471                Token::Geoname {
472                    geoname_match,
473                    match_len,
474                } => {
475                    if geoname_match.geoname.geoname_type == crate::geoname::GeonameType::City {
476                        if city_match.is_some() {
477                            // We already matched a city, so the path includes
478                            // more than one, which is invalid.
479                            return None;
480                        }
481                        city_match = Some(geoname_match);
482                        city_match_len = match_len;
483                    } else {
484                        any_other_geoname_full = any_other_geoname_full || !geoname_match.prefix;
485                        max_other_geoname_match_len =
486                            std::cmp::max(max_other_geoname_match_len, match_len)
487                    }
488                }
489            }
490        }
491
492        if let Some(cm) = city_match {
493            // This path matched a city. See if it has a valid combination of
494            // tokens. Keep a few things in mind:
495            //
496            // (1) The query contains a city name of some sort: a proper name,
497            //     abbreviation, or airport code
498            // (2) It may be a name prefix and not a full name
499            // (3) Prefix matching happens only at the end of the query string
500            let is_valid =
501                // The query has full weather keyword(s):
502                //
503                // weather l
504                // weather la
505                // weather lo
506                // weather los angeles
507                // weather pdx
508                // la weather
509                // los angeles weather
510                // pdx weather
511                (kw_match_count > 0 && any_kw_match_full)
512                    // The query has weather keyword(s) (full or prefix) + a
513                    // full city abbreviation:
514                    //
515                    // la w
516                    // la we
517                    // la weather
518                    // la ca w
519                    // weather la
520                    // weather la c
521                    // weather la ca
522                    || (kw_match_count > 0 && !cm.prefix && cm.match_type.is_abbreviation())
523                    // The query has a full city proper name:
524                    //
525                    // los angeles
526                    // los angeles c
527                    // los angeles ca
528                    // ca los angeles
529                    // los angeles w
530                    // los angeles we
531                    // los angeles weather
532                    // los angeles ca w
533                    // weather los angeles
534                    // weather los angeles c
535                    // weather los angeles ca
536                    || (!cm.prefix && cm.match_type.is_name())
537                    // The query has a full city abbreviation + a full related
538                    // geoname:
539                    //
540                    // la ca
541                    // la california
542                    // ca la
543                    // california la
544                    || (!cm.prefix
545                        && cm.match_type.is_abbreviation()
546                        && any_other_geoname_full);
547            if is_valid {
548                return Some(Self::City(CityTokenPath {
549                    city_match: cm,
550                    city_match_len,
551                    other_geoname_match_len: max_other_geoname_match_len,
552                }));
553            }
554        } else if kw_match_count > 0 && max_other_geoname_match_len == 0 {
555            // This path matched weather keyword(s) alone.
556            return Some(Self::WeatherKeywordsAlone(kw_match_count));
557        }
558
559        None
560    }
561}
562
563#[cfg(test)]
564mod tests {
565    use super::*;
566    use crate::{
567        geoname, geoname::Geoname, store::tests::TestStore, testing::*, SuggestIngestionConstraints,
568    };
569
570    impl From<Geoname> for Suggestion {
571        fn from(g: Geoname) -> Self {
572            Suggestion::Weather {
573                city: Some(g),
574                score: 0.24,
575            }
576        }
577    }
578
579    #[test]
580    fn weather_provider_config() -> anyhow::Result<()> {
581        before_each();
582        let store = TestStore::new(MockRemoteSettingsClient::default().with_record(
583            SuggestionProvider::Weather.record(
584                "weather-1",
585                json!({
586                    "min_keyword_length": 3,
587                    "keywords": ["ab", "xyz", "weather"],
588                    "score": 0.24
589                }),
590            ),
591        ));
592        store.ingest(SuggestIngestionConstraints {
593            providers: Some(vec![SuggestionProvider::Weather]),
594            ..SuggestIngestionConstraints::all_providers()
595        });
596        assert_eq!(
597            store.fetch_provider_config(SuggestionProvider::Weather),
598            Some(SuggestProviderConfig::Weather {
599                score: 0.24,
600                min_keyword_length: 3,
601            })
602        );
603        Ok(())
604    }
605
606    #[test]
607    fn weather_keywords_prefixes_allowed() -> anyhow::Result<()> {
608        before_each();
609
610        let store = TestStore::new(MockRemoteSettingsClient::default().with_record(
611            SuggestionProvider::Weather.record(
612                "weather-1",
613                json!({
614                    // min_keyword_length > 0 means prefixes are allowed.
615                    "min_keyword_length": 5,
616                    "keywords": ["ab", "xyz", "cdefg", "weather"],
617                    "score": 0.24
618                }),
619            ),
620        ));
621
622        store.ingest(SuggestIngestionConstraints {
623            providers: Some(vec![SuggestionProvider::Weather]),
624            ..SuggestIngestionConstraints::all_providers()
625        });
626
627        let no_matches = [
628            // doesn't match any keyword
629            "ab123",
630            "123ab",
631            "xyz12",
632            "12xyz",
633            "xcdefg",
634            "cdefgx",
635            "x cdefg",
636            "weatherx",
637            "xweather",
638            "xweat",
639            "weatx",
640            "x   weather",
641            "weather foo",
642            "foo weather",
643            // too short
644            "ab",
645            "xyz",
646            "cdef",
647            "we",
648            "wea",
649            "weat",
650        ];
651        for q in no_matches {
652            assert_eq!(store.fetch_suggestions(SuggestionQuery::weather(q)), vec![]);
653        }
654
655        let matches = [
656            "cdefg",
657            // full keyword ("cdefg") + prefix of another keyword ("xyz")
658            "cdefg x",
659            "weath",
660            "weathe",
661            "weather",
662            "WeAtHeR",
663            "  weather  ",
664            // full keyword ("weather") + prefix of another keyword ("xyz")
665            "   weather x",
666        ];
667        for q in matches {
668            assert_eq!(
669                store.fetch_suggestions(SuggestionQuery::weather(q)),
670                vec![Suggestion::Weather {
671                    score: 0.24,
672                    city: None,
673                }]
674            );
675        }
676
677        Ok(())
678    }
679
680    #[test]
681    fn weather_keywords_prefixes_not_allowed() -> anyhow::Result<()> {
682        before_each();
683
684        let store = TestStore::new(MockRemoteSettingsClient::default().with_record(
685            SuggestionProvider::Weather.record(
686                "weather-1",
687                json!({
688                    // min_keyword_length == 0 means prefixes are not allowed.
689                    "min_keyword_length": 0,
690                    "keywords": ["weather"],
691                    "score": 0.24
692                }),
693            ),
694        ));
695
696        store.ingest(SuggestIngestionConstraints {
697            providers: Some(vec![SuggestionProvider::Weather]),
698            ..SuggestIngestionConstraints::all_providers()
699        });
700
701        let no_matches = ["wea", "weat", "weath", "weathe"];
702        for q in no_matches {
703            assert_eq!(
704                store.fetch_suggestions(SuggestionQuery::weather(q)),
705                vec![],
706                "query: {:?}",
707                q
708            );
709        }
710
711        let matches = ["weather", "WeAtHeR", "  weather  "];
712        for q in matches {
713            assert_eq!(
714                store.fetch_suggestions(SuggestionQuery::weather(q)),
715                vec![Suggestion::Weather {
716                    score: 0.24,
717                    city: None,
718                }],
719                "query: {:?}",
720                q
721            );
722        }
723
724        Ok(())
725    }
726
727    #[test]
728    fn weather_keywords_collate() -> anyhow::Result<()> {
729        before_each();
730
731        let store = TestStore::new(MockRemoteSettingsClient::default().with_record(
732            SuggestionProvider::Weather.record(
733                "weather-1",
734                json!({
735                    "min_keyword_length": 0,
736                    "keywords": [
737                        "AbC xYz",
738                        "Àęí",
739                        // "wéather" with single 'é' char
740                        "w\u{00e9}ather",
741                        // "éfg" with ASCII 'e' followed by combining acute
742                        // accent
743                        "e\u{0301}fg",
744                        "größe",
745                        "abc. def-ghi",
746                        "x.y.z.",
747                    ],
748                    "score": 0.24
749                }),
750            ),
751        ));
752
753        store.ingest(SuggestIngestionConstraints {
754            providers: Some(vec![SuggestionProvider::Weather]),
755            ..SuggestIngestionConstraints::all_providers()
756        });
757
758        let matches = [
759            "AbC xYz",
760            "ABC XYZ",
761            "abc xyz",
762            "Àęí",
763            "Aei",
764            "àęí",
765            "aei",
766            // "wéather" with single 'é' char
767            "w\u{00e9}ather",
768            // "wéather" with ASCII 'e' followed by combining acute
769            // accent
770            "we\u{0301}ather",
771            "weather",
772            // "éfg" with single 'é' char
773            "\u{00e9}fg",
774            // "éfg" with ASCII 'e' followed by combining acute
775            // accent
776            "e\u{0301}fg",
777            "efg",
778            "größe",
779            "große",
780            "grösse",
781            "grosse",
782            "abc. def-ghi",
783            "abc def-ghi",
784            "abc. def ghi",
785            "abc def ghi",
786            "x.y.z.",
787            "xy.z.",
788            "x.yz.",
789            "x.y.z",
790            "xyz.",
791            "xy.z",
792            "x.yz",
793            "xyz",
794        ];
795
796        for q in matches {
797            assert_eq!(
798                store.fetch_suggestions(SuggestionQuery::weather(q)),
799                vec![Suggestion::Weather {
800                    score: 0.24,
801                    city: None,
802                }],
803                "query: {:?}",
804                q
805            );
806        }
807
808        Ok(())
809    }
810
811    #[test]
812    fn weather_keyword_includes_city_prefix() -> anyhow::Result<()> {
813        before_each();
814
815        let kws = [
816            "weather",
817            "weather ",
818            // These keywords start with the "weather" keyword and end in
819            // prefixes of "new york"
820            "weather n",
821            "weather ne",
822            "weather new",
823            // These keywords are prefixes of "new york"
824            "new",
825            "new ",
826            "new y",
827            "new yo",
828        ];
829
830        let mut store = geoname::tests::new_test_store();
831        store
832            .client_mut()
833            .add_record(SuggestionProvider::Weather.record(
834                "weather-1",
835                json!({
836                    "min_keyword_length": 0,
837                    "keywords": kws,
838                    "score": 0.24
839                }),
840            ));
841
842        store.ingest(SuggestIngestionConstraints {
843            providers: Some(vec![SuggestionProvider::Weather]),
844            ..SuggestIngestionConstraints::all_providers()
845        });
846
847        // Make sure "new york" really matches a city.
848        assert_eq!(
849            store.fetch_suggestions(SuggestionQuery::weather("new york")),
850            vec![Suggestion::Weather {
851                score: 0.24,
852                city: Some(geoname::tests::nyc()),
853            }],
854        );
855
856        // Queries for each of the keywords alone should match a suggestion
857        // without a city, even though for example "weather new" also matches
858        // the "weather" keyword and a prefix of "new york".
859        for q in kws {
860            assert_eq!(
861                store.fetch_suggestions(SuggestionQuery::weather(q)),
862                vec![Suggestion::Weather {
863                    score: 0.24,
864                    city: None,
865                }],
866                "Keyword alone query: {:?}",
867                q
868            );
869        }
870
871        // These queries match both the "weather" keyword and a city but are not
872        // also keywords themselves, so their suggestions should include the
873        // city.
874        let city_matches = [
875            "weather new y",
876            "weather new yo",
877            "weather new yor",
878            "weather new york",
879        ];
880        for q in city_matches {
881            assert_eq!(
882                store.fetch_suggestions(SuggestionQuery::weather(q)),
883                vec![Suggestion::Weather {
884                    score: 0.24,
885                    city: Some(geoname::tests::nyc()),
886                }],
887                "City query: {:?}",
888                q
889            );
890        }
891
892        Ok(())
893    }
894
895    #[test]
896    fn weather_keyword_same_as_city() -> anyhow::Result<()> {
897        before_each();
898
899        let mut store = geoname::tests::new_test_store();
900        store
901            .client_mut()
902            .add_record(SuggestionProvider::Weather.record(
903                "weather-1",
904                json!({
905                    "min_keyword_length": 0,
906                    "keywords": [],
907                    "score": 0.24
908                }),
909            ));
910        store.ingest(SuggestIngestionConstraints {
911            providers: Some(vec![SuggestionProvider::Weather]),
912            ..SuggestIngestionConstraints::all_providers()
913        });
914
915        // Make sure "new york" really matches a city.
916        for q in ["new york", "new york city"] {
917            assert_eq!(
918                store.fetch_suggestions(SuggestionQuery::weather("new york")),
919                vec![Suggestion::Weather {
920                    score: 0.24,
921                    city: Some(geoname::tests::nyc()),
922                }],
923                "new york query: {:?}",
924                q
925            );
926        }
927
928        store
929            .client_mut()
930            .update_record(SuggestionProvider::Weather.record(
931                "weather-1",
932                json!({
933                    "min_keyword_length": 0,
934                    "keywords": ["new york"],
935                    "score": 0.24
936                }),
937            ));
938        store.ingest(SuggestIngestionConstraints {
939            providers: Some(vec![SuggestionProvider::Weather]),
940            ..SuggestIngestionConstraints::all_providers()
941        });
942
943        assert_eq!(
944            store.fetch_suggestions(SuggestionQuery::weather("new york")),
945            vec![Suggestion::Weather {
946                score: 0.24,
947                city: None,
948            }],
949        );
950
951        assert_eq!(
952            store.fetch_suggestions(SuggestionQuery::weather("new york city")),
953            vec![Suggestion::Weather {
954                score: 0.24,
955                city: Some(geoname::tests::nyc()),
956            }],
957        );
958
959        Ok(())
960    }
961
962    #[test]
963    fn cities_and_regions() -> anyhow::Result<()> {
964        before_each();
965
966        let record_json = json!({
967            "keywords": [
968                "ab",
969                "xyz",
970                // "weather" is a prefix of "weather near me" -- when a
971                // test matches both one suggestion should be returned
972                "weather",
973                "weather near me",
974                // These are suffixes of two place names that both start
975                // with "New"
976                "york",
977                "orleans",
978                // This is an admin division name
979                "iowa",
980            ],
981            "score": 0.24
982        });
983
984        // Each test is run twice, once with a weather record where
985        // `min_keyword_length` is 0 and once where it's 5.
986        struct Test<'a> {
987            query: &'a str,
988            // expected suggestions per `min_keyword_length`
989            min_keyword_len_0: Vec<Suggestion>,
990            min_keyword_len_5: Vec<Suggestion>,
991        }
992
993        const KW_SUGGESTION: Suggestion = Suggestion::Weather {
994            score: 0.24,
995            city: None,
996        };
997
998        let tests: &[Test] = &[
999            // "act" is Waco's airport code. Airport codes require full weather
1000            // keywords to match.
1001            Test {
1002                query: "act",
1003                min_keyword_len_0: vec![],
1004                min_keyword_len_5: vec![],
1005            },
1006            Test {
1007                query: "act w",
1008                min_keyword_len_0: vec![],
1009                min_keyword_len_5: vec![],
1010            },
1011            Test {
1012                query: "act we",
1013                min_keyword_len_0: vec![],
1014                min_keyword_len_5: vec![],
1015            },
1016            Test {
1017                query: "act wea",
1018                min_keyword_len_0: vec![],
1019                min_keyword_len_5: vec![],
1020            },
1021            Test {
1022                query: "act weat",
1023                min_keyword_len_0: vec![],
1024                min_keyword_len_5: vec![],
1025            },
1026            Test {
1027                query: "act weath",
1028                min_keyword_len_0: vec![],
1029                min_keyword_len_5: vec![],
1030            },
1031            Test {
1032                query: "act weather",
1033                min_keyword_len_0: vec![geoname::tests::waco().into()],
1034                min_keyword_len_5: vec![geoname::tests::waco().into()],
1035            },
1036
1037            Test {
1038                // A suggestion without a city should be returned because this
1039                // query matches a full keyword ("weather") + a prefix of
1040                // another keyword ("ab").
1041                query: "weather a",
1042                min_keyword_len_0: vec![KW_SUGGESTION.clone()],
1043                min_keyword_len_5: vec![KW_SUGGESTION.clone()],
1044            },
1045            Test {
1046                query: "weather ac",
1047                min_keyword_len_0: vec![],
1048                min_keyword_len_5: vec![],
1049            },
1050            Test {
1051                query: "weather act",
1052                min_keyword_len_0: vec![geoname::tests::waco().into()],
1053                min_keyword_len_5: vec![geoname::tests::waco().into()],
1054            },
1055            Test {
1056                query: "act t",
1057                min_keyword_len_0: vec![],
1058                min_keyword_len_5: vec![],
1059            },
1060            Test {
1061                query: "act tx",
1062                min_keyword_len_0: vec![],
1063                min_keyword_len_5: vec![],
1064            },
1065            Test {
1066                query: "act tx w",
1067                min_keyword_len_0: vec![],
1068                min_keyword_len_5: vec![],
1069            },
1070            Test {
1071                query: "act tx weat",
1072                min_keyword_len_0: vec![],
1073                min_keyword_len_5: vec![],
1074            },
1075            Test {
1076                query: "act tx weath",
1077                min_keyword_len_0: vec![],
1078                min_keyword_len_5: vec![],
1079            },
1080            Test {
1081                query: "act tx weather",
1082                min_keyword_len_0: vec![geoname::tests::waco().into()],
1083                min_keyword_len_5: vec![geoname::tests::waco().into()],
1084            },
1085            Test {
1086                query: "tx a",
1087                min_keyword_len_0: vec![],
1088                min_keyword_len_5: vec![],
1089            },
1090            Test {
1091                query: "tx ac",
1092                min_keyword_len_0: vec![],
1093                min_keyword_len_5: vec![],
1094            },
1095            Test {
1096                query: "tx act",
1097                min_keyword_len_0: vec![],
1098                min_keyword_len_5: vec![],
1099            },
1100            Test {
1101                query: "tx act w",
1102                min_keyword_len_0: vec![],
1103                min_keyword_len_5: vec![],
1104            },
1105            Test {
1106                query: "tx act weat",
1107                min_keyword_len_0: vec![],
1108                min_keyword_len_5: vec![],
1109            },
1110            Test {
1111                query: "tx act weath",
1112                min_keyword_len_0: vec![],
1113                min_keyword_len_5: vec![],
1114            },
1115            Test {
1116                query: "tx act weather",
1117                min_keyword_len_0: vec![geoname::tests::waco().into()],
1118                min_keyword_len_5: vec![geoname::tests::waco().into()],
1119            },
1120            Test {
1121                query: "act te",
1122                min_keyword_len_0: vec![],
1123                min_keyword_len_5: vec![],
1124            },
1125            Test {
1126                query: "act tex",
1127                min_keyword_len_0: vec![],
1128                min_keyword_len_5: vec![],
1129            },
1130            Test {
1131                query: "act texa",
1132                min_keyword_len_0: vec![],
1133                min_keyword_len_5: vec![],
1134            },
1135            Test {
1136                query: "act texas",
1137                min_keyword_len_0: vec![],
1138                min_keyword_len_5: vec![],
1139            },
1140            Test {
1141                query: "act texas w",
1142                min_keyword_len_0: vec![],
1143                min_keyword_len_5: vec![],
1144            },
1145            Test {
1146                query: "act texas weat",
1147                min_keyword_len_0: vec![],
1148                min_keyword_len_5: vec![],
1149            },
1150            Test {
1151                query: "act texas weath",
1152                min_keyword_len_0: vec![],
1153                min_keyword_len_5: vec![],
1154            },
1155            Test {
1156                query: "act texas weather",
1157                min_keyword_len_0: vec![geoname::tests::waco().into()],
1158                min_keyword_len_5: vec![geoname::tests::waco().into()],
1159            },
1160            Test {
1161                query: "texas a",
1162                min_keyword_len_0: vec![],
1163                min_keyword_len_5: vec![],
1164            },
1165            Test {
1166                query: "texas ac",
1167                min_keyword_len_0: vec![],
1168                min_keyword_len_5: vec![],
1169            },
1170            Test {
1171                query: "texas act",
1172                min_keyword_len_0: vec![],
1173                min_keyword_len_5: vec![],
1174            },
1175            Test {
1176                query: "texas act w",
1177                min_keyword_len_0: vec![],
1178                min_keyword_len_5: vec![],
1179            },
1180            Test {
1181                query: "texas act weat",
1182                min_keyword_len_0: vec![],
1183                min_keyword_len_5: vec![],
1184            },
1185            Test {
1186                query: "texas act weath",
1187                min_keyword_len_0: vec![],
1188                min_keyword_len_5: vec![],
1189            },
1190            Test {
1191                query: "texas act weather",
1192                min_keyword_len_0: vec![geoname::tests::waco().into()],
1193                min_keyword_len_5: vec![geoname::tests::waco().into()],
1194            },
1195
1196            Test {
1197                query: "ia w",
1198                min_keyword_len_0: vec![],
1199                min_keyword_len_5: vec![],
1200            },
1201            Test {
1202                query: "ia wa",
1203                min_keyword_len_0: vec![],
1204                min_keyword_len_5: vec![],
1205            },
1206            Test {
1207                query: "ia wat",
1208                min_keyword_len_0: vec![],
1209                min_keyword_len_5: vec![],
1210            },
1211            Test {
1212                query: "ia wate",
1213                min_keyword_len_0: vec![],
1214                min_keyword_len_5: vec![],
1215            },
1216            Test {
1217                query: "ia water",
1218                min_keyword_len_0: vec![],
1219                min_keyword_len_5: vec![],
1220            },
1221            Test {
1222                query: "ia waterl",
1223                min_keyword_len_0: vec![],
1224                min_keyword_len_5: vec![],
1225            },
1226            Test {
1227                query: "ia waterlo",
1228                min_keyword_len_0: vec![],
1229                min_keyword_len_5: vec![],
1230            },
1231            Test {
1232                query: "ia waterloo",
1233                min_keyword_len_0: vec![geoname::tests::waterloo_ia().into()],
1234                min_keyword_len_5: vec![geoname::tests::waterloo_ia().into()],
1235            },
1236
1237            Test {
1238                query: "w",
1239                min_keyword_len_0: vec![],
1240                min_keyword_len_5: vec![],
1241            },
1242            Test {
1243                query: "wa",
1244                min_keyword_len_0: vec![],
1245                min_keyword_len_5: vec![],
1246            },
1247            Test {
1248                query: "wat",
1249                min_keyword_len_0: vec![],
1250                min_keyword_len_5: vec![],
1251            },
1252            Test {
1253                query: "wate",
1254                min_keyword_len_0: vec![],
1255                min_keyword_len_5: vec![],
1256            },
1257            Test {
1258                query: "water",
1259                min_keyword_len_0: vec![],
1260                min_keyword_len_5: vec![],
1261            },
1262            Test {
1263                query: "waterl",
1264                min_keyword_len_0: vec![],
1265                min_keyword_len_5: vec![],
1266            },
1267            Test {
1268                query: "waterlo",
1269                min_keyword_len_0: vec![],
1270                min_keyword_len_5: vec![],
1271            },
1272            Test {
1273                query: "waterloo",
1274                // Matches should be returned by population descending.
1275                min_keyword_len_0: vec![
1276                    geoname::tests::waterloo_on().into(),
1277                    geoname::tests::waterloo_ia().into(),
1278                    geoname::tests::waterloo_al().into(),
1279                ],
1280                min_keyword_len_5: vec![
1281                    geoname::tests::waterloo_on().into(),
1282                    geoname::tests::waterloo_ia().into(),
1283                    geoname::tests::waterloo_al().into(),
1284                ],
1285            },
1286
1287            Test {
1288                query: "i",
1289                min_keyword_len_0: vec![],
1290                min_keyword_len_5: vec![],
1291            },
1292            Test {
1293                query: "ia",
1294                min_keyword_len_0: vec![],
1295                min_keyword_len_5: vec![],
1296            },
1297            Test {
1298                query: "io",
1299                min_keyword_len_0: vec![],
1300                min_keyword_len_5: vec![],
1301            },
1302            Test {
1303                query: "iow",
1304                min_keyword_len_0: vec![],
1305                min_keyword_len_5: vec![],
1306            },
1307            Test {
1308                // "iowa" is a also weather keyword.
1309                query: "iowa",
1310                min_keyword_len_0: vec![KW_SUGGESTION.clone()],
1311                min_keyword_len_5: vec![],
1312            },
1313
1314            Test {
1315                query: "waterloo al",
1316                min_keyword_len_0: vec![geoname::tests::waterloo_al().into()],
1317                min_keyword_len_5: vec![geoname::tests::waterloo_al().into()],
1318            },
1319            Test {
1320                query: "al waterloo",
1321                min_keyword_len_0: vec![geoname::tests::waterloo_al().into()],
1322                min_keyword_len_5: vec![geoname::tests::waterloo_al().into()],
1323            },
1324
1325            Test {
1326                query: "waterloo ia al",
1327                min_keyword_len_0: vec![],
1328                min_keyword_len_5: vec![],
1329            },
1330            Test {
1331                query: "waterloo ny",
1332                min_keyword_len_0: vec![],
1333                min_keyword_len_5: vec![],
1334            },
1335
1336            Test {
1337                query: "al",
1338                min_keyword_len_0: vec![],
1339                min_keyword_len_5: vec![],
1340            },
1341            Test {
1342                query: "alabama",
1343                min_keyword_len_0: vec![],
1344                min_keyword_len_5: vec![],
1345            },
1346
1347            Test {
1348                query: "new york",
1349                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1350                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1351            },
1352            Test {
1353                query: "new york new york",
1354                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1355                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1356            },
1357            Test {
1358                query: "ny ny",
1359                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1360                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1361            },
1362            Test {
1363                query: "ny ny ny",
1364                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1365                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1366            },
1367
1368            Test {
1369                query: "ny n",
1370                min_keyword_len_0: vec![],
1371                min_keyword_len_5: vec![],
1372            },
1373            Test {
1374                query: "ny ne",
1375                min_keyword_len_0: vec![],
1376                min_keyword_len_5: vec![],
1377            },
1378            Test {
1379                query: "ny new",
1380                min_keyword_len_0: vec![],
1381                min_keyword_len_5: vec![],
1382            },
1383            Test {
1384                query: "ny new ",
1385                min_keyword_len_0: vec![],
1386                min_keyword_len_5: vec![],
1387            },
1388
1389            // These shouldn't match anything. "ny" is an NYC abbreviation, and
1390            // without a weather keyword, abbreviations require another fully
1391            // typed related geoname. "york" is also a weather keyword but that
1392            // shouldn't matter here since "new" by itself is not a full name.
1393            Test {
1394                query: "ny new y",
1395                min_keyword_len_0: vec![],
1396                min_keyword_len_5: vec![],
1397            },
1398            Test {
1399                query: "ny new yo",
1400                min_keyword_len_0: vec![],
1401                min_keyword_len_5: vec![],
1402            },
1403            Test {
1404                query: "ny new yor",
1405                min_keyword_len_0: vec![],
1406                min_keyword_len_5: vec![],
1407            },
1408
1409            Test {
1410                query: "ny new york",
1411                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1412                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1413            },
1414            Test {
1415                query: "new york ny",
1416                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1417                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1418            },
1419
1420            Test {
1421                query: "weather ny",
1422                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1423                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1424            },
1425
1426            // "ny" is an NYC abbreviation. A suggestion should be returned once
1427            // the query ends with any prefix of "weather" (a keyword).
1428            Test {
1429                query: "ny w",
1430                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1431                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1432            },
1433            Test {
1434                query: "ny weat",
1435                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1436                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1437            },
1438            Test {
1439                query: "ny weath",
1440                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1441                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1442            },
1443            Test {
1444                query: "ny weather",
1445                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1446                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1447            },
1448
1449            Test {
1450                query: "weather ny ny",
1451                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1452                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1453            },
1454            Test {
1455                query: "ny weather ny",
1456                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1457                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1458            },
1459            Test {
1460                query: "ny ny weather",
1461                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1462                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1463            },
1464
1465            Test {
1466                query: "rochester ny",
1467                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1468                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1469            },
1470            Test {
1471                query: "ny rochester",
1472                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1473                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1474            },
1475            Test {
1476                query: "weather rochester ny",
1477                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1478                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1479            },
1480            Test {
1481                query: "rochester weather ny",
1482                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1483                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1484            },
1485            Test {
1486                query: "rochester ny weather",
1487                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1488                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1489            },
1490            Test {
1491                query: "weather ny rochester",
1492                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1493                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1494            },
1495            Test {
1496                query: "ny weather rochester",
1497                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1498                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1499            },
1500            Test {
1501                query: "ny rochester weather",
1502                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1503                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1504            },
1505
1506            Test {
1507                query: "weather new york",
1508                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1509                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1510            },
1511            Test {
1512                query: "new weather york",
1513                min_keyword_len_0: vec![],
1514                min_keyword_len_5: vec![],
1515            },
1516            Test {
1517                query: "new york weather",
1518                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1519                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1520            },
1521            Test {
1522                query: "weather new york new york",
1523                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1524                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1525            },
1526            Test {
1527                query: "new york weather new york",
1528                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1529                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1530            },
1531            Test {
1532                query: "new york new york weather",
1533                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1534                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1535            },
1536
1537            Test {
1538                query: "weather water",
1539                min_keyword_len_0: vec![
1540                    geoname::tests::waterloo_on().into(),
1541                    geoname::tests::waterloo_ia().into(),
1542                    geoname::tests::waterloo_al().into(),
1543                ],
1544                min_keyword_len_5: vec![
1545                    geoname::tests::waterloo_on().into(),
1546                    geoname::tests::waterloo_ia().into(),
1547                    geoname::tests::waterloo_al().into(),
1548                ],
1549            },
1550            Test {
1551                query: "waterloo w",
1552                min_keyword_len_0: vec![
1553                    geoname::tests::waterloo_on().into(),
1554                    geoname::tests::waterloo_ia().into(),
1555                    geoname::tests::waterloo_al().into(),
1556                ],
1557                min_keyword_len_5: vec![
1558                    geoname::tests::waterloo_on().into(),
1559                    geoname::tests::waterloo_ia().into(),
1560                    geoname::tests::waterloo_al().into(),
1561                ],
1562            },
1563            Test {
1564                query: "weather w w",
1565                min_keyword_len_0: vec![],
1566                min_keyword_len_5: vec![],
1567            },
1568            Test {
1569                query: "weather w water",
1570                min_keyword_len_0: vec![],
1571                min_keyword_len_5: vec![],
1572            },
1573            Test {
1574                query: "weather w waterloo",
1575                min_keyword_len_0: vec![],
1576                min_keyword_len_5: vec![],
1577            },
1578            Test {
1579                query: "weather water w",
1580                min_keyword_len_0: vec![],
1581                min_keyword_len_5: vec![],
1582            },
1583            Test {
1584                query: "weather waterloo water",
1585                min_keyword_len_0: vec![],
1586                min_keyword_len_5: vec![],
1587            },
1588            Test {
1589                query: "weather water water",
1590                min_keyword_len_0: vec![],
1591                min_keyword_len_5: vec![],
1592            },
1593            Test {
1594                query: "weather water waterloo",
1595                min_keyword_len_0: vec![],
1596                min_keyword_len_5: vec![],
1597            },
1598
1599            Test {
1600                query: "waterloo foo",
1601                min_keyword_len_0: vec![],
1602                min_keyword_len_5: vec![],
1603            },
1604            Test {
1605                query: "waterloo weather foo",
1606                min_keyword_len_0: vec![],
1607                min_keyword_len_5: vec![],
1608            },
1609            Test {
1610                query: "foo waterloo",
1611                min_keyword_len_0: vec![],
1612                min_keyword_len_5: vec![],
1613            },
1614            Test {
1615                query: "foo waterloo weather",
1616                min_keyword_len_0: vec![],
1617                min_keyword_len_5: vec![],
1618            },
1619            Test {
1620                query: "weather waterloo foo",
1621                min_keyword_len_0: vec![],
1622                min_keyword_len_5: vec![],
1623            },
1624            Test {
1625                query: "weather foo waterloo",
1626                min_keyword_len_0: vec![],
1627                min_keyword_len_5: vec![],
1628            },
1629            Test {
1630                query: "weather water foo",
1631                min_keyword_len_0: vec![],
1632                min_keyword_len_5: vec![],
1633            },
1634            Test {
1635                query: "weather foo water",
1636                min_keyword_len_0: vec![],
1637                min_keyword_len_5: vec![],
1638            },
1639
1640            Test {
1641                query: "waterloo on",
1642                min_keyword_len_0: vec![geoname::tests::waterloo_on().into()],
1643                min_keyword_len_5: vec![geoname::tests::waterloo_on().into()],
1644            },
1645            Test {
1646                query: "waterloo ont",
1647                min_keyword_len_0: vec![geoname::tests::waterloo_on().into()],
1648                min_keyword_len_5: vec![geoname::tests::waterloo_on().into()],
1649            },
1650            Test {
1651                query: "waterloo ont.",
1652                min_keyword_len_0: vec![geoname::tests::waterloo_on().into()],
1653                min_keyword_len_5: vec![geoname::tests::waterloo_on().into()],
1654            },
1655            Test {
1656                query: "waterloo ontario",
1657                min_keyword_len_0: vec![geoname::tests::waterloo_on().into()],
1658                min_keyword_len_5: vec![geoname::tests::waterloo_on().into()],
1659            },
1660            Test {
1661                query: "waterloo canada",
1662                min_keyword_len_0: vec![geoname::tests::waterloo_on().into()],
1663                min_keyword_len_5: vec![geoname::tests::waterloo_on().into()],
1664            },
1665            Test {
1666                query: "waterloo on canada",
1667                min_keyword_len_0: vec![geoname::tests::waterloo_on().into()],
1668                min_keyword_len_5: vec![geoname::tests::waterloo_on().into()],
1669            },
1670
1671            Test {
1672                query: "waterloo on us",
1673                min_keyword_len_0: vec![],
1674                min_keyword_len_5: vec![],
1675            },
1676            Test {
1677                query: "waterloo al canada",
1678                min_keyword_len_0: vec![],
1679                min_keyword_len_5: vec![],
1680            },
1681
1682            Test {
1683                query: "ny",
1684                min_keyword_len_0: vec![],
1685                min_keyword_len_5: vec![],
1686            },
1687            Test {
1688                query: "nyc",
1689                min_keyword_len_0: vec![],
1690                min_keyword_len_5: vec![],
1691            },
1692            Test {
1693                query: "roc",
1694                min_keyword_len_0: vec![],
1695                min_keyword_len_5: vec![],
1696            },
1697            Test {
1698                query: "roc ny",
1699                min_keyword_len_0: vec![],
1700                min_keyword_len_5: vec![],
1701            },
1702            Test {
1703                query: "ny roc",
1704                min_keyword_len_0: vec![],
1705                min_keyword_len_5: vec![],
1706            },
1707
1708            Test {
1709                query: "nyc ny",
1710                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1711                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1712            },
1713            Test {
1714                query: "ny nyc",
1715                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1716                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1717            },
1718            Test {
1719                query: "nyc weather",
1720                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1721                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1722            },
1723            Test {
1724                query: "weather nyc",
1725                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1726                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1727            },
1728            Test {
1729                query: "roc weather",
1730                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1731                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1732            },
1733            Test {
1734                query: "weather roc",
1735                min_keyword_len_0: vec![geoname::tests::rochester().into()],
1736                min_keyword_len_5: vec![geoname::tests::rochester().into()],
1737            },
1738
1739            Test {
1740                // full "weather" keyword + name prefix
1741                query: "weather new",
1742                min_keyword_len_0: vec![
1743                    geoname::tests::nyc().into(),
1744                    geoname::tests::new_orleans().into(),
1745                ],
1746                min_keyword_len_5: vec![
1747                    geoname::tests::nyc().into(),
1748                    geoname::tests::new_orleans().into(),
1749                ],
1750            },
1751            Test {
1752                // full "weather" keyword + name prefix + "xyz" keyword prefix,
1753                // invalid
1754                query: "weather new xy",
1755                min_keyword_len_0: vec![],
1756                min_keyword_len_5: vec![],
1757            },
1758            Test {
1759                // full "weather" keyword + name prefix + "weather" keyword
1760                // prefix, invalid
1761                query: "weather new we",
1762                min_keyword_len_0: vec![],
1763                min_keyword_len_5: vec![],
1764            },
1765
1766            // These should match New York even though there's also a weather
1767            // keyword called "york".
1768            Test {
1769                query: "weather new y",
1770                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1771                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1772            },
1773            Test {
1774                query: "weather new yo",
1775                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1776                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1777            },
1778            Test {
1779                query: "weather new yor",
1780                min_keyword_len_0: vec![geoname::tests::nyc().into()],
1781                min_keyword_len_5: vec![geoname::tests::nyc().into()],
1782            },
1783
1784            // These should match New Orleans even though there's also a weather
1785            // keyword "orleans".
1786            Test {
1787                query: "weather new o",
1788                min_keyword_len_0: vec![geoname::tests::new_orleans().into()],
1789                min_keyword_len_5: vec![geoname::tests::new_orleans().into()],
1790            },
1791            Test {
1792                query: "weather new or",
1793                min_keyword_len_0: vec![geoname::tests::new_orleans().into()],
1794                min_keyword_len_5: vec![geoname::tests::new_orleans().into()],
1795            },
1796            Test {
1797                query: "weather new orl",
1798                min_keyword_len_0: vec![geoname::tests::new_orleans().into()],
1799                min_keyword_len_5: vec![geoname::tests::new_orleans().into()],
1800            },
1801            Test {
1802                query: "weather new orle",
1803                min_keyword_len_0: vec![geoname::tests::new_orleans().into()],
1804                min_keyword_len_5: vec![geoname::tests::new_orleans().into()],
1805            },
1806            Test {
1807                query: "weather new orlea",
1808                min_keyword_len_0: vec![geoname::tests::new_orleans().into()],
1809                min_keyword_len_5: vec![geoname::tests::new_orleans().into()],
1810            },
1811            Test {
1812                query: "weather new orlean",
1813                min_keyword_len_0: vec![geoname::tests::new_orleans().into()],
1814                min_keyword_len_5: vec![geoname::tests::new_orleans().into()],
1815            },
1816            Test {
1817                query: "weather new orleans",
1818                min_keyword_len_0: vec![geoname::tests::new_orleans().into()],
1819                min_keyword_len_5: vec![geoname::tests::new_orleans().into()],
1820            },
1821
1822            Test {
1823                query: "new o",
1824                min_keyword_len_0: vec![],
1825                min_keyword_len_5: vec![],
1826            },
1827            Test {
1828                query: "new orlean",
1829                min_keyword_len_0: vec![],
1830                min_keyword_len_5: vec![],
1831            },
1832            Test {
1833                query: "new orleans",
1834                min_keyword_len_0: vec![geoname::tests::new_orleans().into()],
1835                min_keyword_len_5: vec![geoname::tests::new_orleans().into()],
1836            },
1837
1838            // Query with a weather keyword that's also an admin division name:
1839            // This should match only Waterloo, IA even though "iowa" is also a
1840            // weather keyword.
1841            Test {
1842                query: "weather waterloo iowa",
1843                min_keyword_len_0: vec![geoname::tests::waterloo_ia().into()],
1844                min_keyword_len_5: vec![geoname::tests::waterloo_ia().into()],
1845            },
1846
1847            Test {
1848                query: "weather san diego",
1849                min_keyword_len_0: vec![geoname::tests::san_diego().into()],
1850                min_keyword_len_5: vec![geoname::tests::san_diego().into()],
1851            },
1852
1853            // This should match "san diego" (the city) and "ca" (the state). It
1854            // should not match Carlsbad, CA even though Carlsbad starts with
1855            // "ca" and it's in San Diego County.
1856            Test {
1857                query: "weather san diego ca",
1858                min_keyword_len_0: vec![geoname::tests::san_diego().into()],
1859                min_keyword_len_5: vec![geoname::tests::san_diego().into()],
1860            },
1861
1862            // These should match Carlsbad since it's in San Diego County.
1863            Test {
1864                query: "weather san diego car",
1865                min_keyword_len_0: vec![geoname::tests::carlsbad().into()],
1866                min_keyword_len_5: vec![geoname::tests::carlsbad().into()],
1867            },
1868            Test {
1869                query: "weather san diego carlsbad",
1870                min_keyword_len_0: vec![geoname::tests::carlsbad().into()],
1871                min_keyword_len_5: vec![geoname::tests::carlsbad().into()],
1872            },
1873
1874            // In these next two, "san" is a prefix of San Diego, and "ca" is
1875            // both a prefix of Carlsbad and a California abbreviation.
1876            Test {
1877                // "san ca" is not a city or city prefix, and "san" is not the
1878                // last term in the query, so this shouldn't match anything.
1879                query: "weather san ca",
1880                min_keyword_len_0: vec![],
1881                min_keyword_len_5: vec![],
1882            },
1883            Test {
1884                // "ca" is a full abbreviation for California, and "san" should
1885                // prefix match on San Diego since it's the last term in the
1886                // query. San Diego is in California, so it should be returned.
1887                query: "weather ca san",
1888                min_keyword_len_0: vec![geoname::tests::san_diego().into()],
1889                min_keyword_len_5: vec![geoname::tests::san_diego().into()],
1890            },
1891
1892            // "san carl" isn't a prefix of any city, and "san" should not match
1893            // San Diego County since it's not at the end of the query.
1894            Test {
1895                query: "weather san carl",
1896                min_keyword_len_0: vec![],
1897                min_keyword_len_5: vec![],
1898            },
1899            Test {
1900                query: "san carl",
1901                min_keyword_len_0: vec![],
1902                min_keyword_len_5: vec![],
1903            },
1904
1905            Test {
1906                query: "liverpool",
1907                min_keyword_len_0: vec![geoname::tests::liverpool_city().into()],
1908                min_keyword_len_5: vec![geoname::tests::liverpool_city().into()],
1909            },
1910            Test {
1911                query: "liverpool",
1912                min_keyword_len_0: vec![geoname::tests::liverpool_city().into()],
1913                min_keyword_len_5: vec![geoname::tests::liverpool_city().into()],
1914            },
1915            Test {
1916                query: "liverpool eng",
1917                min_keyword_len_0: vec![geoname::tests::liverpool_city().into()],
1918                min_keyword_len_5: vec![geoname::tests::liverpool_city().into()],
1919            },
1920            Test {
1921                query: "liverpool england",
1922                min_keyword_len_0: vec![geoname::tests::liverpool_city().into()],
1923                min_keyword_len_5: vec![geoname::tests::liverpool_city().into()],
1924            },
1925            Test {
1926                query: "liverpool uk",
1927                min_keyword_len_0: vec![geoname::tests::liverpool_city().into()],
1928                min_keyword_len_5: vec![geoname::tests::liverpool_city().into()],
1929            },
1930            Test {
1931                query: "liverpool england uk",
1932                min_keyword_len_0: vec![geoname::tests::liverpool_city().into()],
1933                min_keyword_len_5: vec![geoname::tests::liverpool_city().into()],
1934            },
1935
1936            Test {
1937                query: "La Visitation-de-l'Île-Dupas",
1938                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1939                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1940            },
1941            Test {
1942                query: "la visitation-de-l'île-dupas",
1943                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1944                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1945            },
1946            Test {
1947                query: "la visitation-de-l'île-dupas wea",
1948                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1949                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1950            },
1951            Test {
1952                query: "la visitation-de-l'île-dupas weather",
1953                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1954                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1955            },
1956            Test {
1957                query: "weather la visitation-de-l'île-dupas",
1958                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1959                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1960            },
1961            Test {
1962                query: "weather la v",
1963                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1964                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1965            },
1966            Test {
1967                query: "weather la visitation",
1968                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1969                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1970            },
1971            Test {
1972                query: "weather la visitation de",
1973                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1974                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1975            },
1976            Test {
1977                query: "la visitation de lile dupas",
1978                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1979                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1980            },
1981            Test {
1982                query: "la visitation de lile dupas wea",
1983                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1984                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1985            },
1986            Test {
1987                query: "la visitation de lile dupas weather",
1988                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1989                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1990            },
1991            Test {
1992                query: "weather la visitation de lile dupas",
1993                min_keyword_len_0: vec![geoname::tests::la_visitation().into()],
1994                min_keyword_len_5: vec![geoname::tests::la_visitation().into()],
1995            },
1996
1997            Test {
1998                query: geoname::tests::LONG_NAME,
1999                min_keyword_len_0: vec![geoname::tests::long_name_city().into()],
2000                min_keyword_len_5: vec![geoname::tests::long_name_city().into()],
2001            },
2002
2003            Test {
2004                query: "     waterloo iowa",
2005                min_keyword_len_0: vec![geoname::tests::waterloo_ia().into()],
2006                min_keyword_len_5: vec![geoname::tests::waterloo_ia().into()],
2007            },
2008            Test {
2009                query: "     waterloo ia",
2010                min_keyword_len_0: vec![geoname::tests::waterloo_ia().into()],
2011                min_keyword_len_5: vec![geoname::tests::waterloo_ia().into()],
2012            },
2013
2014            Test {
2015                query: "waterloo     ia",
2016                min_keyword_len_0: vec![geoname::tests::waterloo_ia().into()],
2017                min_keyword_len_5: vec![geoname::tests::waterloo_ia().into()],
2018            },
2019            Test {
2020                query: "waterloo ia     ",
2021                min_keyword_len_0: vec![geoname::tests::waterloo_ia().into()],
2022                min_keyword_len_5: vec![geoname::tests::waterloo_ia().into()],
2023            },
2024            Test {
2025                query: "  waterloo   ia    ",
2026                min_keyword_len_0: vec![geoname::tests::waterloo_ia().into()],
2027                min_keyword_len_5: vec![geoname::tests::waterloo_ia().into()],
2028            },
2029            Test {
2030                query: "   WaTeRlOo   ",
2031                min_keyword_len_0: vec![
2032                    geoname::tests::waterloo_on().into(),
2033                    geoname::tests::waterloo_ia().into(),
2034                    geoname::tests::waterloo_al().into(),
2035                ],
2036                min_keyword_len_5: vec![
2037                    geoname::tests::waterloo_on().into(),
2038                    geoname::tests::waterloo_ia().into(),
2039                    geoname::tests::waterloo_al().into(),
2040                ],
2041            },
2042
2043            Test {
2044                query: "     new york weather",
2045                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2046                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2047            },
2048            Test {
2049                query: "new     york weather",
2050                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2051                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2052            },
2053            Test {
2054                query: "new york     weather",
2055                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2056                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2057            },
2058            Test {
2059                query: "new york weather     ",
2060                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2061                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2062            },
2063
2064            Test {
2065                query: "rochester",
2066                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2067                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2068            },
2069            Test {
2070                query: "rochester ,",
2071                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2072                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2073            },
2074            Test {
2075                query: "rochester , ",
2076                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2077                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2078            },
2079            Test {
2080                query: "rochester,ny",
2081                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2082                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2083            },
2084            Test {
2085                query: "rochester, ny",
2086                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2087                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2088            },
2089            Test {
2090                query: "rochester ,ny",
2091                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2092                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2093            },
2094            Test {
2095                query: "rochester , ny",
2096                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2097                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2098            },
2099            Test {
2100                query: "weather rochester,",
2101                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2102                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2103            },
2104            Test {
2105                query: "weather rochester, ",
2106                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2107                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2108            },
2109            Test {
2110                query: "weather rochester , ",
2111                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2112                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2113            },
2114            Test {
2115                query: "weather rochester,ny",
2116                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2117                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2118            },
2119            Test {
2120                query: "weather rochester, ny",
2121                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2122                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2123            },
2124            Test {
2125                query: "weather rochester ,ny",
2126                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2127                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2128            },
2129            Test {
2130                query: "weather rochester , ny",
2131                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2132                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2133            },
2134            Test {
2135                query: "rochester,weather",
2136                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2137                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2138            },
2139            Test {
2140                query: "rochester, weather",
2141                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2142                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2143            },
2144            Test {
2145                query: "rochester ,weather",
2146                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2147                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2148            },
2149            Test {
2150                query: "rochester , weather",
2151                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2152                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2153            },
2154            Test {
2155                query: "rochester,ny weather",
2156                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2157                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2158            },
2159            Test {
2160                query: "rochester, ny weather",
2161                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2162                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2163            },
2164            Test {
2165                query: "rochester ,ny weather",
2166                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2167                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2168            },
2169            Test {
2170                query: "rochester , ny weather",
2171                min_keyword_len_0: vec![geoname::tests::rochester().into()],
2172                min_keyword_len_5: vec![geoname::tests::rochester().into()],
2173            },
2174
2175            Test {
2176                query: "new york,",
2177                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2178                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2179            },
2180            Test {
2181                query: "new york ,",
2182                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2183                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2184            },
2185            Test {
2186                query: "new york , ",
2187                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2188                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2189            },
2190            Test {
2191                query: "new york,ny",
2192                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2193                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2194            },
2195            Test {
2196                query: "new york, ny",
2197                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2198                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2199            },
2200            Test {
2201                query: "new york ,ny",
2202                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2203                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2204            },
2205            Test {
2206                query: "new york , ny",
2207                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2208                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2209            },
2210            Test {
2211                query: "weather new york,ny",
2212                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2213                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2214            },
2215            Test {
2216                query: "weather new york, ny",
2217                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2218                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2219            },
2220            Test {
2221                query: "weather new york ,ny",
2222                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2223                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2224            },
2225            Test {
2226                query: "weather new york , ny",
2227                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2228                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2229            },
2230            Test {
2231                query: "new york,weather",
2232                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2233                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2234            },
2235            Test {
2236                query: "new york, weather",
2237                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2238                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2239            },
2240            Test {
2241                query: "new york ,weather",
2242                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2243                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2244            },
2245            Test {
2246                query: "new york , weather",
2247                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2248                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2249            },
2250            Test {
2251                query: "new york,ny weather",
2252                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2253                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2254            },
2255            Test {
2256                query: "new york, ny weather",
2257                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2258                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2259            },
2260            Test {
2261                query: "new york ,ny weather",
2262                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2263                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2264            },
2265            Test {
2266                query: "new york , ny weather",
2267                min_keyword_len_0: vec![geoname::tests::nyc().into()],
2268                min_keyword_len_5: vec![geoname::tests::nyc().into()],
2269            },
2270
2271            Test {
2272                query: &format!("{} weather", geoname::tests::LONG_NAME),
2273                min_keyword_len_0: vec![geoname::tests::long_name_city().into()],
2274                min_keyword_len_5: vec![geoname::tests::long_name_city().into()],
2275            },
2276            Test {
2277                query: &format!("weather {}", geoname::tests::LONG_NAME),
2278                min_keyword_len_0: vec![geoname::tests::long_name_city().into()],
2279                min_keyword_len_5: vec![geoname::tests::long_name_city().into()],
2280            },
2281
2282            Test {
2283                query: &format!("{} and some other words that don't match anything but that is neither here nor there", geoname::tests::LONG_NAME),
2284                min_keyword_len_0: vec![],
2285                min_keyword_len_5: vec![],
2286            },
2287            Test {
2288                query: &format!("and some other words that don't match anything {} but that is neither here nor there", geoname::tests::LONG_NAME),
2289                min_keyword_len_0: vec![],
2290                min_keyword_len_5: vec![],
2291            },
2292            Test {
2293                query: &format!("and some other words that don't match anything but that is neither here nor there {}", geoname::tests::LONG_NAME),
2294                min_keyword_len_0: vec![],
2295                min_keyword_len_5: vec![],
2296            },
2297            Test {
2298                query: &format!("weather {} and some other words that don't match anything but that is neither here nor there", geoname::tests::LONG_NAME),
2299                min_keyword_len_0: vec![],
2300                min_keyword_len_5: vec![],
2301            },
2302            Test {
2303                query: &format!("{} weather and some other words that don't match anything but that is neither here nor there", geoname::tests::LONG_NAME),
2304                min_keyword_len_0: vec![],
2305                min_keyword_len_5: vec![],
2306            },
2307            Test {
2308                query: &format!("{} and some other words that don't match anything weather but that is neither here nor there", geoname::tests::LONG_NAME),
2309                min_keyword_len_0: vec![],
2310                min_keyword_len_5: vec![],
2311            },
2312            Test {
2313                query: &format!("{} and some other words that don't match anything but that is neither here nor there weather", geoname::tests::LONG_NAME),
2314                min_keyword_len_0: vec![],
2315                min_keyword_len_5: vec![],
2316            },
2317            Test {
2318                query: &format!("weather and some other words that don't match anything {} but that is neither here nor there", geoname::tests::LONG_NAME),
2319                min_keyword_len_0: vec![],
2320                min_keyword_len_5: vec![],
2321            },
2322            Test {
2323                query: &format!("weather and some other words that don't match anything but that is neither here nor there {}", geoname::tests::LONG_NAME),
2324                min_keyword_len_0: vec![],
2325                min_keyword_len_5: vec![],
2326            },
2327            Test {
2328                query: &format!("and some other words that don't match anything weather {} but that is neither here nor there", geoname::tests::LONG_NAME),
2329                min_keyword_len_0: vec![],
2330                min_keyword_len_5: vec![],
2331            },
2332            Test {
2333                query: &format!("and some other words that don't match anything but that is neither here nor there weather {}", geoname::tests::LONG_NAME),
2334                min_keyword_len_0: vec![],
2335                min_keyword_len_5: vec![],
2336            },
2337            Test {
2338                query: &format!("{} weather and then this also doesn't match anything down here", geoname::tests::LONG_NAME),
2339                min_keyword_len_0: vec![],
2340                min_keyword_len_5: vec![],
2341            },
2342            Test {
2343                query: &format!("{} and then this also doesn't match anything down here weather", geoname::tests::LONG_NAME),
2344                min_keyword_len_0: vec![],
2345                min_keyword_len_5: vec![],
2346            },
2347            Test {
2348                query: &format!("and then this also doesn't match anything down here {} weather", geoname::tests::LONG_NAME),
2349                min_keyword_len_0: vec![],
2350                min_keyword_len_5: vec![],
2351            },
2352            Test {
2353                query: &format!("and then this also doesn't match anything down here weather {}", geoname::tests::LONG_NAME),
2354                min_keyword_len_0: vec![],
2355                min_keyword_len_5: vec![],
2356            },
2357        ];
2358
2359        for min_keyword_length in [0, 5] {
2360            let mut store = geoname::tests::new_test_store();
2361            store
2362                .client_mut()
2363                .add_record(SuggestionProvider::Weather.record(
2364                    "weather-1",
2365                    record_json.clone().merge(json!({
2366                        "min_keyword_length": min_keyword_length,
2367                    })),
2368                ));
2369
2370            store.ingest(SuggestIngestionConstraints {
2371                providers: Some(vec![SuggestionProvider::Weather]),
2372                ..SuggestIngestionConstraints::all_providers()
2373            });
2374
2375            for test in tests {
2376                assert_eq!(
2377                    &store.fetch_suggestions(SuggestionQuery::weather(test.query)),
2378                    match min_keyword_length {
2379                        0 => &test.min_keyword_len_0,
2380                        5 => &test.min_keyword_len_5,
2381                        _ => std::unreachable!(),
2382                    },
2383                    "Query: {:?}, min_keyword_length={}",
2384                    test.query,
2385                    min_keyword_length
2386                );
2387            }
2388        }
2389
2390        Ok(())
2391    }
2392
2393    #[test]
2394    fn keywords_metrics() -> anyhow::Result<()> {
2395        before_each();
2396
2397        // Add a couple of records with different metrics. We're just testing
2398        // metrics so the other values don't matter.
2399        let mut store = TestStore::new(
2400            MockRemoteSettingsClient::default()
2401                .with_record(SuggestionProvider::Weather.record(
2402                    "weather-0",
2403                    json!({
2404                        "min_keyword_length": 3,
2405                        "score": 0.24,
2406                        "keywords": [
2407                            "a b c d ef"
2408                        ],
2409                    }),
2410                ))
2411                .with_record(SuggestionProvider::Weather.record(
2412                    "weather-1",
2413                    json!({
2414                        "min_keyword_length": 3,
2415                        "score": 0.24,
2416                        "keywords": [
2417                            "abcdefghik lmnopqrst"
2418                        ],
2419                    }),
2420                )),
2421        );
2422
2423        store.ingest(SuggestIngestionConstraints {
2424            providers: Some(vec![SuggestionProvider::Weather]),
2425            ..SuggestIngestionConstraints::all_providers()
2426        });
2427
2428        store.read(|dao| {
2429            let cache = dao.weather_cache();
2430            assert_eq!(cache.keywords_metrics.max_len, 20);
2431            assert_eq!(cache.keywords_metrics.max_word_count, 5);
2432            Ok(())
2433        })?;
2434
2435        // Delete the first record. The metrics should change.
2436        store
2437            .client_mut()
2438            .delete_record(SuggestionProvider::Weather.empty_record("weather-0"));
2439        store.ingest(SuggestIngestionConstraints {
2440            providers: Some(vec![SuggestionProvider::Weather]),
2441            ..SuggestIngestionConstraints::all_providers()
2442        });
2443        store.read(|dao| {
2444            let cache = dao.weather_cache();
2445            assert_eq!(cache.keywords_metrics.max_len, 20);
2446            assert_eq!(cache.keywords_metrics.max_word_count, 2);
2447            Ok(())
2448        })?;
2449
2450        // Add a new record. The metrics should change again.
2451        store
2452            .client_mut()
2453            .add_record(SuggestionProvider::Weather.record(
2454                "weather-3",
2455                json!({
2456                    "min_keyword_length": 3,
2457                    "score": 0.24,
2458                    "keywords": [
2459                        "abcde fghij klmno"
2460                    ]
2461                }),
2462            ));
2463        store.ingest(SuggestIngestionConstraints {
2464            providers: Some(vec![SuggestionProvider::Weather]),
2465            ..SuggestIngestionConstraints::all_providers()
2466        });
2467        store.read(|dao| {
2468            let cache = dao.weather_cache();
2469            assert_eq!(cache.keywords_metrics.max_len, 20);
2470            assert_eq!(cache.keywords_metrics.max_word_count, 3);
2471            Ok(())
2472        })?;
2473
2474        Ok(())
2475    }
2476}