suggest/
fakespot.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5/// Fakespot-specific logic
6///
7/// Score used to order Fakespot suggestions
8///
9/// FakespotScore contains several components, each in the range of [0, 1]
10pub struct FakespotScore {
11    /// Did the query match the `keywords` field exactly?
12    keywords_score: f64,
13    /// How well did the query match the `product_type` field?
14    product_type_score: f64,
15    /// Fakespot score from the RS data, this reflects the average review, number of reviews,
16    /// Fakespot grade, etc.
17    fakespot_score: f64,
18}
19
20impl FakespotScore {
21    pub fn new(query: &str, keywords: String, product_type: String, fakespot_score: f64) -> Self {
22        let query = query.to_lowercase();
23        let query_terms = split_terms(&query);
24        Self {
25            keywords_score: calc_keywords_score(&query_terms, &keywords),
26            product_type_score: calc_product_type_score(&query_terms, &product_type),
27            fakespot_score,
28        }
29    }
30
31    /// Convert a FakespotScore into the value to use in `Sugggestion::Fakespot::score`
32    ///
33    /// This converts FakespotScore into a single float that:
34    ///   - Is > 0.3 so that Fakespot suggestions are preferred to AMP ones
35    ///   - Reflects the Fakespot ordering:
36    ///     - Suggestions with higher keywords_score are greater
37    ///     - If keywords_score is tied, then suggestions with higher product_type_scores are greater
38    ///     - If both are tied, then suggestions with higher fakespot_score are greater
39    pub fn as_suggest_score(&self) -> f64 {
40        0.30 + (0.01 * self.keywords_score)
41            + (0.001 * self.product_type_score)
42            + (0.0001 * self.fakespot_score)
43    }
44}
45
46/// Split a string containing terms into a list of individual terms, normalized to lowercase
47fn split_terms(string: &str) -> Vec<&str> {
48    string.split_whitespace().collect()
49}
50
51fn calc_keywords_score(query_terms: &[&str], keywords: &str) -> f64 {
52    // Note: We can assume keywords is lower-case, since we do that during ingestion
53    let keyword_terms = split_terms(keywords);
54    if keyword_terms.is_empty() {
55        return 0.0;
56    }
57
58    if query_terms == keyword_terms {
59        1.0
60    } else {
61        0.0
62    }
63}
64
65fn calc_product_type_score(query_terms: &[&str], product_type: &str) -> f64 {
66    // Note: We can assume product_type is lower-case, since we do that during ingestion
67    let product_type_terms = split_terms(product_type);
68    if product_type_terms.is_empty() {
69        return 0.0;
70    }
71    let count = product_type_terms
72        .iter()
73        .filter(|t| query_terms.contains(t))
74        .count() as f64;
75    count / product_type_terms.len() as f64
76}
77
78#[cfg(test)]
79mod tests {
80    use super::*;
81
82    struct KeywordsTestCase {
83        keywords: &'static str,
84        query: &'static str,
85        expected: f64,
86    }
87
88    impl KeywordsTestCase {
89        fn test(&self) {
90            let actual =
91                calc_keywords_score(&split_terms(&self.query.to_lowercase()), self.keywords);
92            assert_eq!(
93                actual, self.expected,
94                "keywords: {} query: {} expected: {} actual: {actual}",
95                self.keywords, self.query, self.expected,
96            );
97        }
98    }
99
100    #[test]
101    fn test_keywords_score() {
102        // Keyword score 1.0 on exact matches, 0.0 otherwise
103        KeywordsTestCase {
104            keywords: "apple",
105            query: "apple",
106            expected: 1.0,
107        }
108        .test();
109        KeywordsTestCase {
110            keywords: "apple",
111            query: "android",
112            expected: 0.0,
113        }
114        .test();
115        KeywordsTestCase {
116            keywords: "apple",
117            query: "apple phone",
118            expected: 0.0,
119        }
120        .test();
121        // Empty keywords should always score 0.0
122        KeywordsTestCase {
123            keywords: "",
124            query: "",
125            expected: 0.0,
126        }
127        .test();
128        KeywordsTestCase {
129            keywords: "",
130            query: "apple",
131            expected: 0.0,
132        }
133        .test();
134        // Matching should be case insensitive
135        KeywordsTestCase {
136            keywords: "apple",
137            query: "Apple",
138            expected: 1.0,
139        }
140        .test();
141    }
142
143    struct ProductTypeTestCase {
144        query: &'static str,
145        product_type: &'static str,
146        expected: f64,
147    }
148    impl ProductTypeTestCase {
149        fn test(&self) {
150            let actual = calc_product_type_score(
151                &split_terms(&self.query.to_lowercase()),
152                self.product_type,
153            );
154            assert_eq!(
155                actual, self.expected,
156                "product_type: {} query: {} expected: {} actual: {actual}",
157                self.product_type, self.query, self.expected,
158            );
159        }
160    }
161
162    #[test]
163    fn test_product_type_score() {
164        // Product type scores based on the percentage of terms in the product type that are also
165        // present in the query
166        ProductTypeTestCase {
167            product_type: "standing desk",
168            query: "standing desk",
169            expected: 1.0,
170        }
171        .test();
172        ProductTypeTestCase {
173            product_type: "standing desk",
174            query: "desk",
175            expected: 0.5,
176        }
177        .test();
178        ProductTypeTestCase {
179            product_type: "standing desk",
180            query: "desk desk desk",
181            expected: 0.5,
182        }
183        .test();
184        ProductTypeTestCase {
185            product_type: "standing desk",
186            query: "standing",
187            expected: 0.5,
188        }
189        .test();
190        ProductTypeTestCase {
191            product_type: "standing desk",
192            query: "phone",
193            expected: 0.0,
194        }
195        .test();
196        // Extra terms in the query are ignored
197        ProductTypeTestCase {
198            product_type: "standing desk",
199            query: "standing desk for my office",
200            expected: 1.0,
201        }
202        .test();
203        // Empty product_type should always score 0.0
204        ProductTypeTestCase {
205            product_type: "",
206            query: "",
207            expected: 0.0,
208        }
209        .test();
210        // Matching should be case insensitive
211        ProductTypeTestCase {
212            product_type: "desk",
213            query: "Desk",
214            expected: 1.0,
215        }
216        .test();
217        // Extra spaces are ignored
218        ProductTypeTestCase {
219            product_type: "desk",
220            query: "  desk  ",
221            expected: 1.0,
222        }
223        .test();
224    }
225}