suggest/
query.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use std::collections::HashSet;
6
7use crate::{LabeledTimingSample, Suggestion, SuggestionProvider, SuggestionProviderConstraints};
8
9/// A query for suggestions to show in the address bar.
10#[derive(Clone, Debug, Default, uniffi::Record)]
11pub struct SuggestionQuery {
12    pub keyword: String,
13    pub providers: Vec<SuggestionProvider>,
14    #[uniffi(default = None)]
15    pub provider_constraints: Option<SuggestionProviderConstraints>,
16    #[uniffi(default = None)]
17    pub limit: Option<i32>,
18}
19
20#[derive(uniffi::Record)]
21pub struct QueryWithMetricsResult {
22    pub suggestions: Vec<Suggestion>,
23    /// Samples for the `suggest.query_time` metric
24    pub query_times: Vec<LabeledTimingSample>,
25}
26
27impl SuggestionQuery {
28    // Builder style methods for creating queries (mostly used by the test code)
29
30    pub fn all_providers(keyword: &str) -> Self {
31        Self {
32            keyword: keyword.to_string(),
33            providers: Vec::from(SuggestionProvider::all()),
34            ..Self::default()
35        }
36    }
37
38    pub fn with_providers(keyword: &str, providers: Vec<SuggestionProvider>) -> Self {
39        Self {
40            keyword: keyword.to_string(),
41            providers,
42            ..Self::default()
43        }
44    }
45
46    pub fn all_providers_except(keyword: &str, provider: SuggestionProvider) -> Self {
47        Self::with_providers(
48            keyword,
49            SuggestionProvider::all()
50                .into_iter()
51                .filter(|p| *p != provider)
52                .collect(),
53        )
54    }
55
56    pub fn amp(keyword: &str) -> Self {
57        Self {
58            keyword: keyword.into(),
59            providers: vec![SuggestionProvider::Amp],
60            ..Self::default()
61        }
62    }
63
64    pub fn wikipedia(keyword: &str) -> Self {
65        Self {
66            keyword: keyword.into(),
67            providers: vec![SuggestionProvider::Wikipedia],
68            ..Self::default()
69        }
70    }
71
72    pub fn amo(keyword: &str) -> Self {
73        Self {
74            keyword: keyword.into(),
75            providers: vec![SuggestionProvider::Amo],
76            ..Self::default()
77        }
78    }
79
80    pub fn yelp(keyword: &str) -> Self {
81        Self {
82            keyword: keyword.into(),
83            providers: vec![SuggestionProvider::Yelp],
84            ..Self::default()
85        }
86    }
87
88    pub fn mdn(keyword: &str) -> Self {
89        Self {
90            keyword: keyword.into(),
91            providers: vec![SuggestionProvider::Mdn],
92            ..Self::default()
93        }
94    }
95
96    pub fn fakespot(keyword: &str) -> Self {
97        Self {
98            keyword: keyword.into(),
99            providers: vec![SuggestionProvider::Fakespot],
100            ..Self::default()
101        }
102    }
103
104    pub fn weather(keyword: &str) -> Self {
105        Self {
106            keyword: keyword.into(),
107            providers: vec![SuggestionProvider::Weather],
108            ..Self::default()
109        }
110    }
111
112    pub fn dynamic(keyword: &str, suggestion_types: &[&str]) -> Self {
113        Self {
114            keyword: keyword.into(),
115            providers: vec![SuggestionProvider::Dynamic],
116            provider_constraints: Some(SuggestionProviderConstraints {
117                dynamic_suggestion_types: Some(
118                    suggestion_types.iter().map(|s| s.to_string()).collect(),
119                ),
120                ..SuggestionProviderConstraints::default()
121            }),
122            ..Self::default()
123        }
124    }
125
126    pub fn limit(self, limit: i32) -> Self {
127        Self {
128            limit: Some(limit),
129            ..self
130        }
131    }
132
133    /// Create an FTS query term for our keyword(s)
134    pub(crate) fn fts_query(&self) -> FtsQuery<'_> {
135        FtsQuery::new(&self.keyword)
136    }
137}
138
139pub struct FtsQuery<'a> {
140    pub match_arg: String,
141    pub match_arg_without_prefix_match: String,
142    pub is_prefix_query: bool,
143    keyword_terms: Vec<&'a str>,
144}
145
146impl<'a> FtsQuery<'a> {
147    fn new(keyword: &'a str) -> Self {
148        // Parse the `keyword` field into a set of keywords.
149        //
150        // This is used when passing the keywords into an FTS search.  It:
151        //   - Strips out any `():^*"` chars.  These are typically used for advanced searches, which
152        //     we don't support and it would be weird to only support for FTS searches.
153        //   - splits on whitespace to get a list of individual keywords
154        let keywords = Self::split_terms(keyword);
155        if keywords.is_empty() {
156            return Self {
157                keyword_terms: keywords,
158                match_arg: String::from(r#""""#),
159                match_arg_without_prefix_match: String::from(r#""""#),
160                is_prefix_query: false,
161            };
162        }
163        // Quote each term from `query` and join them together
164        let mut sqlite_match = keywords
165            .iter()
166            .map(|keyword| format!(r#""{keyword}""#))
167            .collect::<Vec<_>>()
168            .join(" ");
169        // If the input is > 3 characters, and there's no whitespace at the end.
170        // We want to append a `*` char to the end to do a prefix match on it.
171        let total_chars = keywords.iter().fold(0, |count, s| count + s.len());
172        let query_ends_in_whitespace = keyword.ends_with(' ');
173        let prefix_match = (total_chars > 3) && !query_ends_in_whitespace;
174        let sqlite_match_without_prefix_match = sqlite_match.clone();
175        if prefix_match {
176            sqlite_match.push('*');
177        }
178        Self {
179            keyword_terms: keywords,
180            is_prefix_query: prefix_match,
181            match_arg: sqlite_match,
182            match_arg_without_prefix_match: sqlite_match_without_prefix_match,
183        }
184    }
185
186    /// Try to figure out if a FTS match required stemming
187    ///
188    /// To test this, we have to try to mimic the SQLite FTS logic. This code doesn't do it
189    /// perfectly, but it should return the correct result most of the time.
190    pub fn match_required_stemming(&self, title: &str) -> bool {
191        let title = title.to_lowercase();
192        let split_title = Self::split_terms(&title);
193
194        !self.keyword_terms.iter().enumerate().all(|(i, keyword)| {
195            split_title.iter().any(|title_word| {
196                let last_keyword = i == self.keyword_terms.len() - 1;
197
198                if last_keyword && self.is_prefix_query {
199                    title_word.starts_with(keyword)
200                } else {
201                    title_word == keyword
202                }
203            })
204        })
205    }
206
207    fn split_terms(phrase: &str) -> Vec<&str> {
208        phrase
209            .split([' ', '(', ')', ':', '^', '*', '"', ','])
210            .filter(|s| !s.is_empty())
211            .collect()
212    }
213}
214
215/// Given a list of full keywords, create an FTS string to match against.
216///
217/// Creates a string with de-duped keywords.
218pub fn full_keywords_to_fts_content<'a>(
219    full_keywords: impl IntoIterator<Item = &'a str>,
220) -> String {
221    let parts: HashSet<_> = full_keywords
222        .into_iter()
223        .flat_map(str::split_whitespace)
224        .map(str::to_lowercase)
225        .collect();
226    let mut result = String::new();
227    for (i, part) in parts.into_iter().enumerate() {
228        if i != 0 {
229            result.push(' ');
230        }
231        result.push_str(&part);
232    }
233    result
234}
235
236#[cfg(test)]
237mod test {
238    use super::*;
239    use std::collections::HashMap;
240
241    fn check_parse_keywords(input: &str, expected: Vec<&str>) {
242        let query = SuggestionQuery::all_providers(input);
243        assert_eq!(query.fts_query().keyword_terms, expected);
244    }
245
246    #[test]
247    fn test_quote() {
248        check_parse_keywords("foo", vec!["foo"]);
249        check_parse_keywords("foo bar", vec!["foo", "bar"]);
250        // Special chars should be stripped
251        check_parse_keywords("\"foo()* ^bar:\"", vec!["foo", "bar"]);
252        // test some corner cases
253        check_parse_keywords("", vec![]);
254        check_parse_keywords(" ", vec![]);
255        check_parse_keywords("   foo     bar       ", vec!["foo", "bar"]);
256        check_parse_keywords("foo:bar", vec!["foo", "bar"]);
257    }
258
259    fn check_fts_query(input: &str, expected: &str) {
260        let query = SuggestionQuery::all_providers(input);
261        assert_eq!(query.fts_query().match_arg, expected);
262    }
263
264    #[test]
265    fn test_fts_query() {
266        // String with < 3 chars shouldn't get a prefix query
267        check_fts_query("r", r#""r""#);
268        check_fts_query("ru", r#""ru""#);
269        check_fts_query("run", r#""run""#);
270        // After 3 chars, we should append `*` to the last term to make it a prefix query
271        check_fts_query("runn", r#""runn"*"#);
272        check_fts_query("running", r#""running"*"#);
273        // The total number of chars is counted, not the number of chars in the last term
274        check_fts_query("running s", r#""running" "s"*"#);
275        // if the input ends in whitespace, then don't do a prefix query
276        check_fts_query("running ", r#""running""#);
277        // Special chars are filtered out
278        check_fts_query("running*\"()^: s", r#""running" "s"*"#);
279        check_fts_query("running *\"()^: s", r#""running" "s"*"#);
280        // Special chars shouldn't count towards the input size when deciding whether to do a
281        // prefix query or not
282        check_fts_query("r():", r#""r""#);
283        // Test empty strings
284        check_fts_query("", r#""""#);
285        check_fts_query(" ", r#""""#);
286        check_fts_query("()", r#""""#);
287    }
288
289    #[test]
290    fn test_fts_query_match_required_stemming() {
291        // These don't require stemming, since each keyword matches a term in the title
292        assert!(!FtsQuery::new("running shoes").match_required_stemming("running shoes"));
293        assert!(
294            !FtsQuery::new("running shoes").match_required_stemming("new balance running shoes")
295        );
296        // Case changes shouldn't matter
297        assert!(!FtsQuery::new("running shoes").match_required_stemming("Running Shoes"));
298        // This doesn't require stemming, since `:` is not part of the word
299        assert!(!FtsQuery::new("running shoes").match_required_stemming("Running: Shoes"));
300        // This requires the keywords to be stemmed in order to match
301        assert!(FtsQuery::new("run shoes").match_required_stemming("running shoes"));
302        // This didn't require stemming, since the last keyword was a prefix match
303        assert!(!FtsQuery::new("running sh").match_required_stemming("running shoes"));
304        // This does require stemming (we know it wasn't a prefix match since there's not enough
305        // characters).
306        assert!(FtsQuery::new("run").match_required_stemming("running shoes"));
307    }
308
309    #[test]
310    fn test_full_keywords_to_fts_content() {
311        check_full_keywords_to_fts_content(["a", "b", "c"], "a b c");
312        check_full_keywords_to_fts_content(["a", "b c"], "a b c");
313        check_full_keywords_to_fts_content(["a", "b c a"], "a b c");
314        check_full_keywords_to_fts_content(["a", "b C A"], "a b c");
315    }
316
317    fn check_full_keywords_to_fts_content<const N: usize>(input: [&str; N], expected: &str) {
318        let mut expected_counts = HashMap::<&str, usize>::new();
319        let mut actual_counts = HashMap::<&str, usize>::new();
320        for term in expected.split_whitespace() {
321            *expected_counts.entry(term).or_default() += 1;
322        }
323        let fts_content = full_keywords_to_fts_content(input);
324        for term in fts_content.split_whitespace() {
325            *actual_counts.entry(term).or_default() += 1;
326        }
327        assert_eq!(actual_counts, expected_counts);
328    }
329}