suggest/fakespot.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/// Fakespot-specific logic
///
/// Score used to order Fakespot suggestions
///
/// FakespotScore contains several components, each in the range of [0, 1]
pub struct FakespotScore {
/// Did the query match the `keywords` field exactly?
keywords_score: f64,
/// How well did the query match the `product_type` field?
product_type_score: f64,
/// Fakespot score from the RS data, this reflects the average review, number of reviews,
/// Fakespot grade, etc.
fakespot_score: f64,
}
impl FakespotScore {
pub fn new(query: &str, keywords: String, product_type: String, fakespot_score: f64) -> Self {
let query = query.to_lowercase();
let query_terms = split_terms(&query);
Self {
keywords_score: calc_keywords_score(&query_terms, &keywords),
product_type_score: calc_product_type_score(&query_terms, &product_type),
fakespot_score,
}
}
/// Convert a FakespotScore into the value to use in `Sugggestion::Fakespot::score`
///
/// This converts FakespotScore into a single float that:
/// - Is > 0.3 so that Fakespot suggestions are preferred to AMP ones
/// - Reflects the Fakespot ordering:
/// - Suggestions with higher keywords_score are greater
/// - If keywords_score is tied, then suggestions with higher product_type_scores are greater
/// - If both are tied, then suggestions with higher fakespot_score are greater
pub fn as_suggest_score(&self) -> f64 {
0.30 + (0.01 * self.keywords_score)
+ (0.001 * self.product_type_score)
+ (0.0001 * self.fakespot_score)
}
}
/// Split a string containing terms into a list of individual terms, normalized to lowercase
fn split_terms(string: &str) -> Vec<&str> {
string.split_whitespace().collect()
}
fn calc_keywords_score(query_terms: &[&str], keywords: &str) -> f64 {
// Note: We can assume keywords is lower-case, since we do that during ingestion
let keyword_terms = split_terms(keywords);
if keyword_terms.is_empty() {
return 0.0;
}
if query_terms == keyword_terms {
1.0
} else {
0.0
}
}
fn calc_product_type_score(query_terms: &[&str], product_type: &str) -> f64 {
// Note: We can assume product_type is lower-case, since we do that during ingestion
let product_type_terms = split_terms(product_type);
if product_type_terms.is_empty() {
return 0.0;
}
let count = product_type_terms
.iter()
.filter(|t| query_terms.contains(t))
.count() as f64;
count / product_type_terms.len() as f64
}
#[cfg(test)]
mod tests {
use super::*;
struct KeywordsTestCase {
keywords: &'static str,
query: &'static str,
expected: f64,
}
impl KeywordsTestCase {
fn test(&self) {
let actual =
calc_keywords_score(&split_terms(&self.query.to_lowercase()), self.keywords);
assert_eq!(
actual, self.expected,
"keywords: {} query: {} expected: {} actual: {actual}",
self.keywords, self.query, self.expected,
);
}
}
#[test]
fn test_keywords_score() {
// Keyword score 1.0 on exact matches, 0.0 otherwise
KeywordsTestCase {
keywords: "apple",
query: "apple",
expected: 1.0,
}
.test();
KeywordsTestCase {
keywords: "apple",
query: "android",
expected: 0.0,
}
.test();
KeywordsTestCase {
keywords: "apple",
query: "apple phone",
expected: 0.0,
}
.test();
// Empty keywords should always score 0.0
KeywordsTestCase {
keywords: "",
query: "",
expected: 0.0,
}
.test();
KeywordsTestCase {
keywords: "",
query: "apple",
expected: 0.0,
}
.test();
// Matching should be case insensitive
KeywordsTestCase {
keywords: "apple",
query: "Apple",
expected: 1.0,
}
.test();
}
struct ProductTypeTestCase {
query: &'static str,
product_type: &'static str,
expected: f64,
}
impl ProductTypeTestCase {
fn test(&self) {
let actual = calc_product_type_score(
&split_terms(&self.query.to_lowercase()),
self.product_type,
);
assert_eq!(
actual, self.expected,
"product_type: {} query: {} expected: {} actual: {actual}",
self.product_type, self.query, self.expected,
);
}
}
#[test]
fn test_product_type_score() {
// Product type scores based on the percentage of terms in the product type that are also
// present in the query
ProductTypeTestCase {
product_type: "standing desk",
query: "standing desk",
expected: 1.0,
}
.test();
ProductTypeTestCase {
product_type: "standing desk",
query: "desk",
expected: 0.5,
}
.test();
ProductTypeTestCase {
product_type: "standing desk",
query: "desk desk desk",
expected: 0.5,
}
.test();
ProductTypeTestCase {
product_type: "standing desk",
query: "standing",
expected: 0.5,
}
.test();
ProductTypeTestCase {
product_type: "standing desk",
query: "phone",
expected: 0.0,
}
.test();
// Extra terms in the query are ignored
ProductTypeTestCase {
product_type: "standing desk",
query: "standing desk for my office",
expected: 1.0,
}
.test();
// Empty product_type should always score 0.0
ProductTypeTestCase {
product_type: "",
query: "",
expected: 0.0,
}
.test();
// Matching should be case insensitive
ProductTypeTestCase {
product_type: "desk",
query: "Desk",
expected: 1.0,
}
.test();
// Extra spaces are ignored
ProductTypeTestCase {
product_type: "desk",
query: " desk ",
expected: 1.0,
}
.test();
}
}