use rusqlite::types::ToSqlOutput;
use rusqlite::{named_params, Result as RusqliteResult, ToSql};
use sql_support::ConnExt;
use url::form_urlencoded;
use crate::{
db::SuggestDao,
provider::SuggestionProvider,
rs::{DownloadedYelpSuggestion, SuggestRecordId},
suggestion::Suggestion,
Result, SuggestionQuery,
};
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
#[repr(u8)]
enum Modifier {
Pre = 0,
Post = 1,
Yelp = 2,
}
impl ToSql for Modifier {
fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
Ok(ToSqlOutput::from(*self as u8))
}
}
#[derive(Clone, Copy, Eq, PartialEq)]
enum FindFrom {
First,
Last,
}
const MAX_QUERY_LENGTH: usize = 150;
const MAX_MODIFIER_WORDS_NUMBER: usize = 2;
const MAX_LOCATION_SIGN_WORDS_NUMBER: usize = 2;
const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2;
impl SuggestDao<'_> {
pub(crate) fn insert_yelp_suggestions(
&mut self,
record_id: &SuggestRecordId,
suggestion: &DownloadedYelpSuggestion,
) -> Result<()> {
for keyword in &suggestion.subjects {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_subjects(record_id, keyword) VALUES(:record_id, :keyword)",
named_params! {
":record_id": record_id.as_str(),
":keyword": keyword,
},
)?;
}
for keyword in &suggestion.pre_modifiers {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
named_params! {
":record_id": record_id.as_str(),
":type": Modifier::Pre,
":keyword": keyword,
},
)?;
}
for keyword in &suggestion.post_modifiers {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
named_params! {
":record_id": record_id.as_str(),
":type": Modifier::Post,
":keyword": keyword,
},
)?;
}
for keyword in &suggestion.yelp_modifiers {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
named_params! {
":record_id": record_id.as_str(),
":type": Modifier::Yelp,
":keyword": keyword,
},
)?;
}
for sign in &suggestion.location_signs {
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_location_signs(record_id, keyword, need_location) VALUES(:record_id, :keyword, :need_location)",
named_params! {
":record_id": record_id.as_str(),
":keyword": sign.keyword,
":need_location": sign.need_location,
},
)?;
}
self.scope.err_if_interrupted()?;
self.conn.execute_cached(
"INSERT INTO yelp_custom_details(record_id, icon_id, score) VALUES(:record_id, :icon_id, :score)",
named_params! {
":record_id": record_id.as_str(),
":icon_id": suggestion.icon_id,
":score": suggestion.score,
},
)?;
Ok(())
}
pub(crate) fn fetch_yelp_suggestions(
&self,
query: &SuggestionQuery,
) -> Result<Vec<Suggestion>> {
if !query.providers.contains(&SuggestionProvider::Yelp) {
return Ok(vec![]);
}
if query.keyword.len() > MAX_QUERY_LENGTH {
return Ok(vec![]);
}
let query_vec: Vec<_> = query.keyword.split_whitespace().collect();
let mut query_words: &[&str] = &query_vec;
let pre_yelp_modifier_tuple =
self.find_modifier(query_words, Modifier::Yelp, FindFrom::First)?;
if let Some((_, rest)) = pre_yelp_modifier_tuple {
query_words = rest;
}
let pre_modifier_tuple = self.find_modifier(query_words, Modifier::Pre, FindFrom::First)?;
if let Some((_, rest)) = pre_modifier_tuple {
query_words = rest;
}
let Some(subject_tuple) = self.find_subject(query_words)? else {
return Ok(vec![]);
};
query_words = subject_tuple.2;
let post_modifier_tuple =
self.find_modifier(query_words, Modifier::Post, FindFrom::First)?;
if let Some((_, rest)) = post_modifier_tuple {
query_words = rest;
}
let location_sign_tuple = self.find_location_sign(query_words)?;
if let Some((_, rest)) = location_sign_tuple {
query_words = rest;
}
let post_yelp_modifier_tuple =
self.find_modifier(query_words, Modifier::Yelp, FindFrom::Last)?;
if let Some((_, rest)) = post_yelp_modifier_tuple {
query_words = rest;
}
let location = if query_words.is_empty() {
None
} else {
Some(query_words.join(" "))
};
let (icon, icon_mimetype, score) = self.fetch_custom_details()?;
let builder = SuggestionBuilder {
subject: &subject_tuple.0,
subject_exact_match: subject_tuple.1,
pre_modifier: pre_modifier_tuple.map(|(words, _)| words.to_string()),
post_modifier: post_modifier_tuple.map(|(words, _)| words.to_string()),
need_location: location_sign_tuple.is_some() || location.is_some(),
location_sign: location_sign_tuple.map(|(words, _)| words.to_string()),
location,
icon,
icon_mimetype,
score,
};
Ok(vec![builder.into()])
}
fn find_modifier<'a>(
&self,
query_words: &'a [&'a str],
modifier_type: Modifier,
find_from: FindFrom,
) -> Result<Option<(String, &'a [&'a str])>> {
if query_words.is_empty() {
return Ok(None);
}
for n in (1..=std::cmp::min(MAX_MODIFIER_WORDS_NUMBER, query_words.len())).rev() {
let Some((candidate_chunk, rest)) = (match find_from {
FindFrom::First => query_words.split_at_checked(n),
FindFrom::Last => query_words
.split_at_checked(query_words.len() - n)
.map(|(front, back)| (back, front)),
}) else {
continue;
};
let candidate = candidate_chunk.join(" ");
if self.conn.query_row_and_then_cachable(
"
SELECT EXISTS (
SELECT 1 FROM yelp_modifiers WHERE type = :type AND keyword = :word LIMIT 1
)
",
named_params! {
":type": modifier_type,
":word": candidate.to_lowercase(),
},
|row| row.get::<_, bool>(0),
true,
)? {
return Ok(Some((candidate, rest)));
}
}
Ok(None)
}
fn find_subject<'a>(
&self,
query_words: &'a [&'a str],
) -> Result<Option<(String, bool, &'a [&'a str])>> {
if query_words.is_empty() {
return Ok(None);
}
let mut query_string = query_words.join(" ");
if let Some(keyword_lowercase) = self.conn.try_query_one::<String, _>(
"SELECT keyword
FROM yelp_subjects
WHERE :query BETWEEN keyword AND keyword || ' ' || x'FFFF'
ORDER BY LENGTH(keyword) ASC, keyword ASC
LIMIT 1",
named_params! {
":query": query_string.to_lowercase(),
},
true,
)? {
return Ok(query_string.get(0..keyword_lowercase.len()).map(|keyword| {
let count = keyword.split_whitespace().count();
(
keyword.to_string(),
true,
query_words.get(count..).unwrap_or_default(),
)
}));
};
if query_string.len() < SUBJECT_PREFIX_MATCH_THRESHOLD {
return Ok(None);
}
if let Some(keyword_lowercase) = self.conn.try_query_one::<String, _>(
"SELECT keyword
FROM yelp_subjects
WHERE keyword BETWEEN :query AND :query || x'FFFF'
ORDER BY LENGTH(keyword) ASC, keyword ASC
LIMIT 1",
named_params! {
":query": query_string.to_lowercase(),
},
true,
)? {
return Ok(keyword_lowercase
.get(query_string.len()..)
.map(|keyword_rest| {
query_string.push_str(keyword_rest);
let count =
std::cmp::min(query_words.len(), query_string.split_whitespace().count());
(
query_string,
false,
query_words.get(count..).unwrap_or_default(),
)
}));
};
Ok(None)
}
fn find_location_sign<'a>(
&self,
query_words: &'a [&'a str],
) -> Result<Option<(String, &'a [&'a str])>> {
if query_words.is_empty() {
return Ok(None);
}
for n in (1..=std::cmp::min(MAX_LOCATION_SIGN_WORDS_NUMBER, query_words.len())).rev() {
let Some((candidate_chunk, rest)) = query_words.split_at_checked(n) else {
continue;
};
let candidate = candidate_chunk.join(" ");
if self.conn.query_row_and_then_cachable(
"
SELECT EXISTS (
SELECT 1 FROM yelp_location_signs WHERE keyword = :word LIMIT 1
)
",
named_params! {
":word": candidate.to_lowercase(),
},
|row| row.get::<_, bool>(0),
true,
)? {
return Ok(Some((candidate, rest)));
}
}
Ok(None)
}
fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, Option<String>, f64)> {
let result = self.conn.query_row_and_then_cachable(
r#"
SELECT
i.data, i.mimetype, y.score
FROM
yelp_custom_details y
LEFT JOIN
icons i
ON y.icon_id = i.id
LIMIT
1
"#,
(),
|row| -> Result<_> {
Ok((
row.get::<_, Option<Vec<u8>>>(0)?,
row.get::<_, Option<String>>(1)?,
row.get::<_, f64>(2)?,
))
},
true,
)?;
Ok(result)
}
}
struct SuggestionBuilder<'a> {
subject: &'a str,
subject_exact_match: bool,
pre_modifier: Option<String>,
post_modifier: Option<String>,
location_sign: Option<String>,
location: Option<String>,
need_location: bool,
icon: Option<Vec<u8>>,
icon_mimetype: Option<String>,
score: f64,
}
impl<'a> From<SuggestionBuilder<'a>> for Suggestion {
fn from(builder: SuggestionBuilder<'a>) -> Suggestion {
let location_modifier = if !builder.need_location {
builder.location_sign.as_deref()
} else {
None
};
let description = [
builder.pre_modifier.as_deref(),
Some(builder.subject),
builder.post_modifier.as_deref(),
location_modifier,
]
.iter()
.flatten()
.copied()
.collect::<Vec<_>>()
.join(" ");
let mut url = String::from("https://www.yelp.com/search?");
let mut parameters = form_urlencoded::Serializer::new(String::new());
parameters.append_pair("find_desc", &description);
if let (Some(location), true) = (&builder.location, builder.need_location) {
parameters.append_pair("find_loc", location);
}
url.push_str(¶meters.finish());
let title = [
builder.pre_modifier.as_deref(),
Some(builder.subject),
builder.post_modifier.as_deref(),
builder.location_sign.as_deref(),
builder.location.as_deref(),
]
.iter()
.flatten()
.copied()
.collect::<Vec<_>>()
.join(" ");
Suggestion::Yelp {
url,
title,
icon: builder.icon,
icon_mimetype: builder.icon_mimetype,
score: builder.score,
has_location_sign: location_modifier.is_none() && builder.location_sign.is_some(),
subject_exact_match: builder.subject_exact_match,
location_param: "find_loc".to_string(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{store::tests::TestStore, testing::*, SuggestIngestionConstraints};
#[test]
fn yelp_functions() -> anyhow::Result<()> {
before_each();
let store = TestStore::new(
MockRemoteSettingsClient::default()
.with_record(SuggestionProvider::Yelp.record("data-4", json!([ramen_yelp()])))
.with_record(SuggestionProvider::Yelp.icon(yelp_favicon())),
);
store.ingest(SuggestIngestionConstraints {
providers: Some(vec![SuggestionProvider::Yelp]),
..SuggestIngestionConstraints::all_providers()
});
store.read(|dao| {
type FindModifierTestCase<'a> =
(&'a str, Modifier, FindFrom, Option<(String, &'a [&'a str])>);
let find_modifer_tests: &[FindModifierTestCase] = &[
("", Modifier::Pre, FindFrom::First, None),
("", Modifier::Post, FindFrom::First, None),
("", Modifier::Yelp, FindFrom::First, None),
("b", Modifier::Pre, FindFrom::First, None),
("be", Modifier::Pre, FindFrom::First, None),
("bes", Modifier::Pre, FindFrom::First, None),
(
"best",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &[])),
),
(
"best ",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &[])),
),
(
"best r",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &["r"])),
),
(
"best ramen",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &["ramen"])),
),
(
"best spicy ramen",
Modifier::Pre,
FindFrom::First,
Some(("best".to_string(), &["spicy", "ramen"])),
),
(
"delivery",
Modifier::Post,
FindFrom::First,
Some(("delivery".to_string(), &[])),
),
(
"yelp",
Modifier::Yelp,
FindFrom::First,
Some(("yelp".to_string(), &[])),
),
(
"same_modifier",
Modifier::Pre,
FindFrom::First,
Some(("same_modifier".to_string(), &[])),
),
(
"same_modifier",
Modifier::Post,
FindFrom::First,
Some(("same_modifier".to_string(), &[])),
),
("same_modifier", Modifier::Yelp, FindFrom::First, None),
("super", Modifier::Pre, FindFrom::First, None),
("super b", Modifier::Pre, FindFrom::First, None),
("super be", Modifier::Pre, FindFrom::First, None),
("super bes", Modifier::Pre, FindFrom::First, None),
(
"super best",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &[])),
),
(
"super best ramen",
Modifier::Pre,
FindFrom::First,
Some(("super best".to_string(), &["ramen"])),
),
(
"super delivery",
Modifier::Post,
FindFrom::First,
Some(("super delivery".to_string(), &[])),
),
(
"yelp keyword",
Modifier::Yelp,
FindFrom::First,
Some(("yelp keyword".to_string(), &[])),
),
("best ramen", Modifier::Post, FindFrom::First, None),
("best ramen", Modifier::Yelp, FindFrom::First, None),
("best ramen", Modifier::Pre, FindFrom::Last, None),
(
"ramen best",
Modifier::Pre,
FindFrom::Last,
Some(("best".to_string(), &["ramen"])),
),
("bestabc", Modifier::Post, FindFrom::First, None),
("bestabc ramen", Modifier::Post, FindFrom::First, None),
(
"BeSt SpIcY rAmEn",
Modifier::Pre,
FindFrom::First,
Some(("BeSt".to_string(), &["SpIcY", "rAmEn"])),
),
(
"SpIcY rAmEn DeLiVeRy",
Modifier::Post,
FindFrom::Last,
Some(("DeLiVeRy".to_string(), &["SpIcY", "rAmEn"])),
),
];
for (query, modifier, findfrom, expected) in find_modifer_tests {
assert_eq!(
dao.find_modifier(
&query.split_whitespace().collect::<Vec<_>>(),
*modifier,
*findfrom
)?,
*expected
);
}
type FindSubjectTestCase<'a> = (&'a str, Option<(String, bool, &'a [&'a str])>);
let find_subject_tests: &[FindSubjectTestCase] = &[
("", None),
("r", None),
("ra", Some(("rats".to_string(), false, &[]))),
("ram", Some(("ramen".to_string(), false, &[]))),
("rame", Some(("ramen".to_string(), false, &[]))),
("ramen", Some(("ramen".to_string(), true, &[]))),
("spi", Some(("spicy ramen".to_string(), false, &[]))),
("spicy ra ", Some(("spicy ramen".to_string(), false, &[]))),
("spicy ramen", Some(("spicy ramen".to_string(), true, &[]))),
(
"spicy ramen gogo",
Some(("spicy ramen".to_string(), true, &["gogo"])),
),
(
"SpIcY rAmEn GoGo",
Some(("SpIcY rAmEn".to_string(), true, &["GoGo"])),
),
("ramenabc", None),
("ramenabc xyz", None),
("spicy ramenabc", None),
("spicy ramenabc xyz", None),
("ramen abc", Some(("ramen".to_string(), true, &["abc"]))),
];
for (query, expected) in find_subject_tests {
assert_eq!(
dao.find_subject(&query.split_whitespace().collect::<Vec<_>>())?,
*expected
);
}
type FindLocationSignTestCase<'a> = (&'a str, Option<(String, &'a [&'a str])>);
let find_location_sign_tests: &[FindLocationSignTestCase] = &[
("", None),
("n", None),
("ne", None),
("nea", None),
("near", Some(("near".to_string(), &[]))),
("near ", Some(("near".to_string(), &[]))),
("near b", Some(("near".to_string(), &["b"]))),
("near by", Some(("near by".to_string(), &[]))),
("near by a", Some(("near by".to_string(), &["a"]))),
];
for (query, expected) in find_location_sign_tests {
assert_eq!(
dao.find_location_sign(&query.split_whitespace().collect::<Vec<_>>())?,
*expected
);
}
Ok(())
})?;
Ok(())
}
}