1use rusqlite::types::{FromSql, FromSqlResult, ToSqlOutput, ValueRef};
7use rusqlite::{named_params, Result as RusqliteResult, ToSql};
8use sql_support::ConnExt;
9use url::form_urlencoded;
10
11use crate::{
12 db::SuggestDao,
13 provider::SuggestionProvider,
14 rs::{DownloadedYelpSuggestion, SuggestRecordId},
15 suggestion::Suggestion,
16 suggestion::YelpSubjectType,
17 Result, SuggestionQuery,
18};
19
20#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)]
21#[repr(u8)]
22enum Modifier {
23 Pre = 0,
24 Post = 1,
25 Yelp = 2,
26 LocationSign = 3,
27}
28
29impl ToSql for Modifier {
30 fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
31 Ok(ToSqlOutput::from(*self as u8))
32 }
33}
34
35impl ToSql for YelpSubjectType {
36 fn to_sql(&self) -> RusqliteResult<ToSqlOutput<'_>> {
37 Ok(ToSqlOutput::from(*self as u8))
38 }
39}
40
41impl FromSql for YelpSubjectType {
42 fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
43 if value.as_i64().unwrap_or_default() == 0 {
44 Ok(YelpSubjectType::Service)
45 } else {
46 Ok(YelpSubjectType::Business)
47 }
48 }
49}
50
51#[derive(Clone, Copy, Eq, PartialEq)]
52enum FindFrom {
53 First,
54 Last,
55}
56
57const MAX_QUERY_LENGTH: usize = 150;
69
70const MAX_MODIFIER_WORDS_NUMBER: usize = 2;
73
74const SUBJECT_PREFIX_MATCH_THRESHOLD: usize = 2;
76
77#[derive(Debug, PartialEq)]
78struct FindSubjectData<'a> {
79 subject: String,
81 exact_match: bool,
83 subject_type: YelpSubjectType,
85 rest: &'a [&'a str],
87}
88
89impl SuggestDao<'_> {
90 pub(crate) fn insert_yelp_suggestions(
92 &mut self,
93 record_id: &SuggestRecordId,
94 suggestion: &DownloadedYelpSuggestion,
95 ) -> Result<()> {
96 for keyword in &suggestion.subjects {
97 self.scope.err_if_interrupted()?;
98 self.conn.execute_cached(
99 "INSERT INTO yelp_subjects(record_id, keyword, subject_type) VALUES(:record_id, :keyword, :subject_type)",
100 named_params! {
101 ":record_id": record_id.as_str(),
102 ":keyword": keyword,
103 ":subject_type": YelpSubjectType::Service,
104 },
105 )?;
106 }
107
108 for keyword in suggestion.business_subjects.as_ref().unwrap_or(&vec![]) {
109 self.scope.err_if_interrupted()?;
110 self.conn.execute_cached(
111 "INSERT INTO yelp_subjects(record_id, keyword, subject_type) VALUES(:record_id, :keyword, :subject_type)",
112 named_params! {
113 ":record_id": record_id.as_str(),
114 ":keyword": keyword,
115 ":subject_type": YelpSubjectType::Business,
116 },
117 )?;
118 }
119
120 for keyword in &suggestion.pre_modifiers {
121 self.scope.err_if_interrupted()?;
122 self.conn.execute_cached(
123 "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
124 named_params! {
125 ":record_id": record_id.as_str(),
126 ":type": Modifier::Pre,
127 ":keyword": keyword,
128 },
129 )?;
130 }
131
132 for keyword in &suggestion.post_modifiers {
133 self.scope.err_if_interrupted()?;
134 self.conn.execute_cached(
135 "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
136 named_params! {
137 ":record_id": record_id.as_str(),
138 ":type": Modifier::Post,
139 ":keyword": keyword,
140 },
141 )?;
142 }
143
144 for keyword in &suggestion.yelp_modifiers {
145 self.scope.err_if_interrupted()?;
146 self.conn.execute_cached(
147 "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
148 named_params! {
149 ":record_id": record_id.as_str(),
150 ":type": Modifier::Yelp,
151 ":keyword": keyword,
152 },
153 )?;
154 }
155
156 for keyword in &suggestion.location_signs {
157 self.scope.err_if_interrupted()?;
158 self.conn.execute_cached(
159 "INSERT INTO yelp_modifiers(record_id, type, keyword) VALUES(:record_id, :type, :keyword)",
160 named_params! {
161 ":record_id": record_id.as_str(),
162 ":type": Modifier::LocationSign,
163 ":keyword": keyword,
164 },
165 )?;
166 }
167
168 self.scope.err_if_interrupted()?;
169 self.conn.execute_cached(
170 "INSERT INTO yelp_custom_details(record_id, icon_id, score) VALUES(:record_id, :icon_id, :score)",
171 named_params! {
172 ":record_id": record_id.as_str(),
173 ":icon_id": suggestion.icon_id,
174 ":score": suggestion.score,
175 },
176 )?;
177
178 Ok(())
179 }
180
181 pub(crate) fn fetch_yelp_suggestions(
183 &self,
184 query: &SuggestionQuery,
185 ) -> Result<Vec<Suggestion>> {
186 if !query.providers.contains(&SuggestionProvider::Yelp) {
187 return Ok(vec![]);
188 }
189
190 if query.keyword.len() > MAX_QUERY_LENGTH {
191 return Ok(vec![]);
192 }
193
194 let query_vec: Vec<_> = query.keyword.split_whitespace().collect();
195 let mut query_words: &[&str] = &query_vec;
196
197 let pre_yelp_modifier_tuple =
198 self.find_modifier(query_words, Modifier::Yelp, FindFrom::First)?;
199 if let Some((_, rest)) = pre_yelp_modifier_tuple {
200 query_words = rest;
201 }
202
203 let pre_modifier_tuple = self.find_modifier(query_words, Modifier::Pre, FindFrom::First)?;
204 if let Some((_, rest)) = pre_modifier_tuple {
205 query_words = rest;
206 }
207
208 let Some(subject_data) = self.find_subject(query_words)? else {
209 return Ok(vec![]);
210 };
211 query_words = subject_data.rest;
212
213 let post_modifier_tuple =
214 self.find_modifier(query_words, Modifier::Post, FindFrom::First)?;
215 if let Some((_, rest)) = post_modifier_tuple {
216 query_words = rest;
217 }
218
219 let location_sign_tuple =
220 self.find_modifier(query_words, Modifier::LocationSign, FindFrom::First)?;
221 if let Some((_, rest)) = location_sign_tuple {
222 query_words = rest;
223 }
224
225 let post_yelp_modifier_tuple =
226 self.find_modifier(query_words, Modifier::Yelp, FindFrom::Last)?;
227 if let Some((_, rest)) = post_yelp_modifier_tuple {
228 query_words = rest;
229 }
230
231 let location = if query_words.is_empty() {
232 None
233 } else {
234 Some(query_words.join(" "))
235 };
236
237 let (icon, icon_mimetype, score) = self.fetch_custom_details()?;
238 let builder = SuggestionBuilder {
239 subject: &subject_data.subject,
240 subject_exact_match: subject_data.exact_match,
241 subject_type: subject_data.subject_type,
242 pre_modifier: pre_modifier_tuple.map(|(words, _)| words.to_string()),
243 post_modifier: post_modifier_tuple.map(|(words, _)| words.to_string()),
244 location_sign: location_sign_tuple.map(|(words, _)| words.to_string()),
245 location,
246 icon,
247 icon_mimetype,
248 score,
249 };
250 Ok(vec![builder.into()])
251 }
252
253 fn find_modifier<'a>(
261 &self,
262 query_words: &'a [&'a str],
263 modifier_type: Modifier,
264 find_from: FindFrom,
265 ) -> Result<Option<(String, &'a [&'a str])>> {
266 if query_words.is_empty() {
267 return Ok(None);
268 }
269
270 for n in (1..=std::cmp::min(MAX_MODIFIER_WORDS_NUMBER, query_words.len())).rev() {
271 let Some((candidate_chunk, rest)) = (match find_from {
272 FindFrom::First => query_words.split_at_checked(n),
273 FindFrom::Last => query_words
274 .split_at_checked(query_words.len() - n)
275 .map(|(front, back)| (back, front)),
276 }) else {
277 continue;
278 };
279
280 let mut candidate = candidate_chunk.join(" ");
281
282 if let Some(keyword_lowercase) = self.conn.try_query_one::<String, _>(
283 if n == query_words.len() {
284 "
285 SELECT keyword FROM yelp_modifiers
286 WHERE type = :type AND keyword BETWEEN :word AND :word || x'FFFF'
287 LIMIT 1
288 "
289 } else {
290 "
291 SELECT keyword FROM yelp_modifiers
292 WHERE type = :type AND keyword = :word
293 LIMIT 1
294 "
295 },
296 named_params! {
297 ":type": modifier_type,
298 ":word": candidate.to_lowercase(),
299 },
300 true,
301 )? {
302 candidate.push_str(keyword_lowercase.get(candidate.len()..).unwrap_or_default());
304 return Ok(Some((candidate, rest)));
305 }
306 }
307
308 Ok(None)
309 }
310
311 fn find_subject<'a>(&self, query_words: &'a [&'a str]) -> Result<Option<FindSubjectData<'a>>> {
314 if query_words.is_empty() {
315 return Ok(None);
316 }
317
318 let mut query_string = query_words.join(" ");
319
320 if let Ok((keyword_lowercase, subject_type)) = self.conn.query_row_and_then_cachable(
322 "SELECT keyword, subject_type
323 FROM yelp_subjects
324 WHERE :query BETWEEN keyword AND keyword || ' ' || x'FFFF'
325 ORDER BY LENGTH(keyword) ASC, keyword ASC
326 LIMIT 1",
327 named_params! {
328 ":query": query_string.to_lowercase(),
329 },
330 |row| -> Result<_> {
331 Ok((row.get::<_, String>(0)?, row.get::<_, YelpSubjectType>(1)?))
332 },
333 true,
334 ) {
335 return Ok(query_string.get(0..keyword_lowercase.len()).map(|keyword| {
337 let count = keyword.split_whitespace().count();
338 FindSubjectData {
339 subject: keyword.to_string(),
340 exact_match: true,
341 subject_type,
342 rest: query_words.get(count..).unwrap_or_default(),
343 }
344 }));
345 };
346
347 if query_string.len() < SUBJECT_PREFIX_MATCH_THRESHOLD {
348 return Ok(None);
349 }
350
351 if let Ok((keyword_lowercase, subject_type)) = self.conn.query_row_and_then_cachable(
353 "SELECT keyword, subject_type
354 FROM yelp_subjects
355 WHERE keyword BETWEEN :query AND :query || x'FFFF'
356 ORDER BY LENGTH(keyword) ASC, keyword ASC
357 LIMIT 1",
358 named_params! {
359 ":query": query_string.to_lowercase(),
360 },
361 |row| -> Result<_> {
362 Ok((row.get::<_, String>(0)?, row.get::<_, YelpSubjectType>(1)?))
363 },
364 true,
365 ) {
366 return Ok(keyword_lowercase
368 .get(query_string.len()..)
369 .map(|keyword_rest| {
370 query_string.push_str(keyword_rest);
371 let count =
372 std::cmp::min(query_words.len(), query_string.split_whitespace().count());
373 FindSubjectData {
374 subject: query_string,
375 exact_match: false,
376 subject_type,
377 rest: query_words.get(count..).unwrap_or_default(),
378 }
379 }));
380 };
381
382 Ok(None)
383 }
384
385 fn fetch_custom_details(&self) -> Result<(Option<Vec<u8>>, Option<String>, f64)> {
399 let result = self.conn.query_row_and_then_cachable(
400 r#"
401 SELECT
402 i.data, i.mimetype, y.score
403 FROM
404 yelp_custom_details y
405 LEFT JOIN
406 icons i
407 ON y.icon_id = i.id
408 LIMIT
409 1
410 "#,
411 (),
412 |row| -> Result<_> {
413 Ok((
414 row.get::<_, Option<Vec<u8>>>(0)?,
415 row.get::<_, Option<String>>(1)?,
416 row.get::<_, f64>(2)?,
417 ))
418 },
419 true,
420 )?;
421
422 Ok(result)
423 }
424}
425
426struct SuggestionBuilder<'a> {
427 subject: &'a str,
428 subject_exact_match: bool,
429 subject_type: YelpSubjectType,
430 pre_modifier: Option<String>,
431 post_modifier: Option<String>,
432 location_sign: Option<String>,
433 location: Option<String>,
434 icon: Option<Vec<u8>>,
435 icon_mimetype: Option<String>,
436 score: f64,
437}
438
439impl<'a> From<SuggestionBuilder<'a>> for Suggestion {
440 fn from(builder: SuggestionBuilder<'a>) -> Suggestion {
441 let description = [
442 builder.pre_modifier.as_deref(),
443 Some(builder.subject),
444 builder.post_modifier.as_deref(),
445 ]
446 .iter()
447 .flatten()
448 .copied()
449 .collect::<Vec<_>>()
450 .join(" ");
451
452 let mut url = String::from("https://www.yelp.com/search?");
454 let mut parameters = form_urlencoded::Serializer::new(String::new());
455 parameters.append_pair("find_desc", &description);
456 if let Some(location) = &builder.location {
457 parameters.append_pair("find_loc", location);
458 }
459 url.push_str(¶meters.finish());
460
461 let title = [
462 builder.pre_modifier.as_deref(),
463 Some(builder.subject),
464 builder.post_modifier.as_deref(),
465 builder.location_sign.as_deref(),
466 builder.location.as_deref(),
467 ]
468 .iter()
469 .flatten()
470 .copied()
471 .collect::<Vec<_>>()
472 .join(" ");
473
474 Suggestion::Yelp {
475 url,
476 title,
477 icon: builder.icon,
478 icon_mimetype: builder.icon_mimetype,
479 score: builder.score,
480 has_location_sign: builder.location_sign.is_some(),
481 subject_exact_match: builder.subject_exact_match,
482 subject_type: builder.subject_type,
483 location_param: "find_loc".to_string(),
484 }
485 }
486}
487
488#[cfg(test)]
489mod tests {
490 use super::*;
491
492 use crate::{store::tests::TestStore, testing::*, SuggestIngestionConstraints};
493
494 #[test]
495 fn yelp_functions() -> anyhow::Result<()> {
496 before_each();
497
498 let store = TestStore::new(
499 MockRemoteSettingsClient::default()
500 .with_record(SuggestionProvider::Yelp.record("data-4", json!([ramen_yelp()])))
501 .with_record(SuggestionProvider::Yelp.icon(yelp_favicon())),
502 );
503
504 store.ingest(SuggestIngestionConstraints {
505 providers: Some(vec![SuggestionProvider::Yelp]),
506 ..SuggestIngestionConstraints::all_providers()
507 });
508
509 store.read(|dao| {
510 type FindModifierTestCase<'a> =
511 (&'a str, Modifier, FindFrom, Option<(String, &'a [&'a str])>);
512 let find_modifer_tests: &[FindModifierTestCase] = &[
513 ("", Modifier::Pre, FindFrom::First, None),
515 ("", Modifier::Post, FindFrom::First, None),
516 ("", Modifier::Yelp, FindFrom::First, None),
517 (
519 "b",
520 Modifier::Pre,
521 FindFrom::First,
522 Some(("best".to_string(), &[])),
523 ),
524 (
525 "be",
526 Modifier::Pre,
527 FindFrom::First,
528 Some(("best".to_string(), &[])),
529 ),
530 (
531 "bes",
532 Modifier::Pre,
533 FindFrom::First,
534 Some(("best".to_string(), &[])),
535 ),
536 (
537 "best",
538 Modifier::Pre,
539 FindFrom::First,
540 Some(("best".to_string(), &[])),
541 ),
542 (
543 "best ",
544 Modifier::Pre,
545 FindFrom::First,
546 Some(("best".to_string(), &[])),
547 ),
548 (
549 "best r",
550 Modifier::Pre,
551 FindFrom::First,
552 Some(("best".to_string(), &["r"])),
553 ),
554 (
555 "best ramen",
556 Modifier::Pre,
557 FindFrom::First,
558 Some(("best".to_string(), &["ramen"])),
559 ),
560 (
561 "best spicy ramen",
562 Modifier::Pre,
563 FindFrom::First,
564 Some(("best".to_string(), &["spicy", "ramen"])),
565 ),
566 (
567 "delivery",
568 Modifier::Post,
569 FindFrom::First,
570 Some(("delivery".to_string(), &[])),
571 ),
572 (
573 "yelp",
574 Modifier::Yelp,
575 FindFrom::First,
576 Some(("yelp".to_string(), &[])),
577 ),
578 (
579 "same_modifier",
580 Modifier::Pre,
581 FindFrom::First,
582 Some(("same_modifier".to_string(), &[])),
583 ),
584 (
585 "same_modifier",
586 Modifier::Post,
587 FindFrom::First,
588 Some(("same_modifier".to_string(), &[])),
589 ),
590 ("same_modifier", Modifier::Yelp, FindFrom::First, None),
591 (
593 "s",
594 Modifier::Pre,
595 FindFrom::First,
596 Some(("same_modifier".to_string(), &[])),
597 ),
598 (
599 "su",
600 Modifier::Pre,
601 FindFrom::First,
602 Some(("super best".to_string(), &[])),
603 ),
604 (
605 "super",
606 Modifier::Pre,
607 FindFrom::First,
608 Some(("super best".to_string(), &[])),
609 ),
610 (
611 "super b",
612 Modifier::Pre,
613 FindFrom::First,
614 Some(("super best".to_string(), &[])),
615 ),
616 (
617 "super be",
618 Modifier::Pre,
619 FindFrom::First,
620 Some(("super best".to_string(), &[])),
621 ),
622 (
623 "super bes",
624 Modifier::Pre,
625 FindFrom::First,
626 Some(("super best".to_string(), &[])),
627 ),
628 (
629 "super best",
630 Modifier::Pre,
631 FindFrom::First,
632 Some(("super best".to_string(), &[])),
633 ),
634 (
635 "super best ramen",
636 Modifier::Pre,
637 FindFrom::First,
638 Some(("super best".to_string(), &["ramen"])),
639 ),
640 (
641 "super delivery",
642 Modifier::Post,
643 FindFrom::First,
644 Some(("super delivery".to_string(), &[])),
645 ),
646 (
647 "yelp keyword",
648 Modifier::Yelp,
649 FindFrom::First,
650 Some(("yelp keyword".to_string(), &[])),
651 ),
652 ("best ramen", Modifier::Post, FindFrom::First, None),
654 ("best ramen", Modifier::Yelp, FindFrom::First, None),
655 ("best ramen", Modifier::Pre, FindFrom::Last, None),
656 (
657 "ramen best",
658 Modifier::Pre,
659 FindFrom::Last,
660 Some(("best".to_string(), &["ramen"])),
661 ),
662 ("bestabc", Modifier::Post, FindFrom::First, None),
664 ("bestabc ramen", Modifier::Post, FindFrom::First, None),
665 (
667 "BeSt SpIcY rAmEn",
668 Modifier::Pre,
669 FindFrom::First,
670 Some(("BeSt".to_string(), &["SpIcY", "rAmEn"])),
671 ),
672 (
673 "SpIcY rAmEn DeLiVeRy",
674 Modifier::Post,
675 FindFrom::Last,
676 Some(("DeLiVeRy".to_string(), &["SpIcY", "rAmEn"])),
677 ),
678 ("be ramen", Modifier::Pre, FindFrom::First, None),
680 ("bes ramen", Modifier::Pre, FindFrom::First, None),
681 ];
682 for (query, modifier, findfrom, expected) in find_modifer_tests {
683 assert_eq!(
684 dao.find_modifier(
685 &query.split_whitespace().collect::<Vec<_>>(),
686 *modifier,
687 *findfrom
688 )?,
689 *expected
690 );
691 }
692
693 type FindSubjectTestCase<'a> = (&'a str, Option<FindSubjectData<'a>>);
694 let find_subject_tests: &[FindSubjectTestCase] = &[
695 ("", None),
697 ("r", None),
698 (
699 "ra",
700 Some(FindSubjectData {
701 subject: "rats".to_string(),
702 exact_match: false,
703 subject_type: YelpSubjectType::Service,
704 rest: &[],
705 }),
706 ),
707 (
708 "ram",
709 Some(FindSubjectData {
710 subject: "ramen".to_string(),
711 exact_match: false,
712 subject_type: YelpSubjectType::Service,
713 rest: &[],
714 }),
715 ),
716 (
717 "rame",
718 Some(FindSubjectData {
719 subject: "ramen".to_string(),
720 exact_match: false,
721 subject_type: YelpSubjectType::Service,
722 rest: &[],
723 }),
724 ),
725 (
726 "ramen",
727 Some(FindSubjectData {
728 subject: "ramen".to_string(),
729 exact_match: true,
730 subject_type: YelpSubjectType::Service,
731 rest: &[],
732 }),
733 ),
734 (
735 "spi",
736 Some(FindSubjectData {
737 subject: "spicy ramen".to_string(),
738 exact_match: false,
739 subject_type: YelpSubjectType::Service,
740 rest: &[],
741 }),
742 ),
743 (
744 "spicy ra ",
745 Some(FindSubjectData {
746 subject: "spicy ramen".to_string(),
747 exact_match: false,
748 subject_type: YelpSubjectType::Service,
749 rest: &[],
750 }),
751 ),
752 (
753 "spicy ramen",
754 Some(FindSubjectData {
755 subject: "spicy ramen".to_string(),
756 exact_match: true,
757 subject_type: YelpSubjectType::Service,
758 rest: &[],
759 }),
760 ),
761 (
762 "spicy ramen gogo",
763 Some(FindSubjectData {
764 subject: "spicy ramen".to_string(),
765 exact_match: true,
766 subject_type: YelpSubjectType::Service,
767 rest: &["gogo"],
768 }),
769 ),
770 (
771 "SpIcY rAmEn GoGo",
772 Some(FindSubjectData {
773 subject: "SpIcY rAmEn".to_string(),
774 exact_match: true,
775 subject_type: YelpSubjectType::Service,
776 rest: &["GoGo"],
777 }),
778 ),
779 ("ramenabc", None),
780 ("ramenabc xyz", None),
781 ("spicy ramenabc", None),
782 ("spicy ramenabc xyz", None),
783 (
784 "ramen abc",
785 Some(FindSubjectData {
786 subject: "ramen".to_string(),
787 exact_match: true,
788 subject_type: YelpSubjectType::Service,
789 rest: &["abc"],
790 }),
791 ),
792 (
793 "the shop",
794 Some(FindSubjectData {
795 subject: "the shop".to_string(),
796 exact_match: true,
797 subject_type: YelpSubjectType::Business,
798 rest: &[],
799 }),
800 ),
801 ];
802 for (query, expected) in find_subject_tests {
803 assert_eq!(
804 dao.find_subject(&query.split_whitespace().collect::<Vec<_>>())?,
805 *expected
806 );
807 }
808
809 type FindLocationSignTestCase<'a> = (&'a str, Option<(String, &'a [&'a str])>);
810 let find_location_sign_tests: &[FindLocationSignTestCase] = &[
811 ("", None),
813 ("n", Some(("near".to_string(), &[]))),
814 ("ne", Some(("near".to_string(), &[]))),
815 ("nea", Some(("near".to_string(), &[]))),
816 ("near", Some(("near".to_string(), &[]))),
817 ("near ", Some(("near".to_string(), &[]))),
818 ("near b", Some(("near by".to_string(), &[]))),
819 ("near by", Some(("near by".to_string(), &[]))),
820 ("near by a", Some(("near by".to_string(), &["a"]))),
821 ("nea r", None),
823 ];
824 for (query, expected) in find_location_sign_tests {
825 assert_eq!(
826 dao.find_modifier(
827 &query.split_whitespace().collect::<Vec<_>>(),
828 Modifier::LocationSign,
829 FindFrom::First
830 )?,
831 *expected
832 );
833 }
834
835 Ok(())
836 })?;
837
838 Ok(())
839 }
840}