1use icu_casemap::CaseMapperBorrowed;
7use icu_normalizer::DecomposingNormalizerBorrowed;
8use icu_properties::props::GeneralCategory;
9use icu_properties::props::GeneralCategoryGroup;
10use icu_properties::CodePointMapDataBorrowed;
11use std::borrow::Cow;
12
13use crate::Result;
14
15static NFKD: DecomposingNormalizerBorrowed = DecomposingNormalizerBorrowed::new_nfkd();
16static GENERAL_CATEGORY: CodePointMapDataBorrowed<'static, GeneralCategory> =
17 icu_properties::CodePointMapData::<GeneralCategory>::new();
18static CASE_MAPPER: CaseMapperBorrowed = CaseMapperBorrowed::new();
19
20fn is_combining_mark(c: char) -> bool {
21 GeneralCategoryGroup::Mark.contains(GENERAL_CATEGORY.get(c))
22}
23
24pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
33 let query_words_len = query.split_whitespace().count();
34 let min_phrase_words_len = if query.ends_with(char::is_whitespace) {
35 query_words_len + 1
38 } else {
39 query_words_len
40 };
41 keywords
42 .iter()
43 .map(AsRef::as_ref)
44 .filter(|phrase| phrase.starts_with(query))
45 .map(|phrase| phrase.split_whitespace().collect::<Vec<_>>())
46 .find(|phrase_words| phrase_words.len() > min_phrase_words_len)
47 .map(|phrase_words| phrase_words[..min_phrase_words_len].join(" "))
48 .unwrap_or_else(|| {
49 keywords
50 .iter()
51 .map(AsRef::as_ref)
52 .filter(|phrase| phrase.starts_with(query) && query.len() < phrase.len())
53 .max_by_key(|phrase| phrase.trim().len())
54 .unwrap_or(query)
55 .to_owned()
56 })
57}
58
59pub fn filter_map_chunks<T: Clone>(
180 words: &[&str],
181 max_chunk_size: usize,
182 f: impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
183) -> Result<Vec<Vec<T>>> {
184 let normalized_query = words.join(" ");
185 filter_map_chunks_recurse(words, &normalized_query, &mut vec![], 0, max_chunk_size, &f)
186}
187
188fn filter_map_chunks_recurse<T: Clone>(
197 remaining_words: &[&str],
198 remaining_query: &str,
199 path: &mut Vec<T>,
200 chunk_index: usize,
201 max_chunk_size: usize,
202 f: &impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
203) -> Result<Vec<Vec<T>>> {
204 let mut this_step_paths: Vec<Vec<T>> = vec![];
207
208 for chunk_size in 1..=max_chunk_size {
209 if remaining_words.len() < chunk_size {
210 break;
214 }
215
216 let chunk_byte_len = remaining_words[..chunk_size]
221 .iter()
222 .fold(chunk_size - 1, |memo, w| memo + w.len());
223 let chunk = &remaining_query[..chunk_byte_len];
224 let is_last_chunk = chunk_size == remaining_words.len();
225
226 if let Some(mapped_values) = f(chunk, chunk_index, is_last_chunk, &path[..])? {
228 for value in mapped_values {
229 if is_last_chunk {
230 this_step_paths.push(vec![value.clone()]);
232 } else {
233 path.push(value.clone());
237 let subtree_paths = filter_map_chunks_recurse(
238 &remaining_words[chunk_size..],
239 &remaining_query[(chunk_byte_len + 1)..],
240 path,
241 chunk_index + chunk_size,
242 max_chunk_size,
243 f,
244 )?;
245 path.pop();
246 for mut p in subtree_paths {
247 p.insert(0, value.clone());
248 this_step_paths.push(p);
249 }
250 }
251 }
252 }
253 }
254
255 Ok(this_step_paths)
256}
257
258pub fn split_keyword(keyword: &str) -> (&str, &str) {
262 keyword.split_once(' ').unwrap_or((keyword, ""))
263}
264
265pub fn i18n_cmp(a: &str, b: &str) -> std::cmp::Ordering {
271 CASE_MAPPER
272 .fold_string(&i18n_transform(a))
273 .cmp(&CASE_MAPPER.fold_string(&i18n_transform(b)))
274}
275
276pub fn i18n_transform(s: &str) -> Cow<'_, str> {
282 macro_rules! pattern_remove {
287 () => {
288 '.' | ',' | '\'' | '’'
289 };
290 }
291
292 macro_rules! pattern_replace_with_space {
296 () => {
297 '-'
298 };
299 }
300
301 macro_rules! pattern_all {
302 () => {
303 pattern_remove!() | pattern_replace_with_space!()
304 };
305 }
306
307 let borrowable = !NFKD
308 .normalize_iter(s.chars())
309 .any(|c| is_combining_mark(c) || matches!(c, pattern_all!()));
310
311 if borrowable {
312 Cow::from(s)
316 } else {
317 NFKD.normalize_iter(s.chars())
318 .filter_map(|c| {
319 if is_combining_mark(c) {
320 None
323 } else {
324 match c {
325 pattern_remove!() => None,
326 pattern_replace_with_space!() => Some(' '),
327 _ => Some(c),
328 }
329 }
330 })
331 .collect::<_>()
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338 use itertools::Itertools;
339
340 #[test]
341 fn keywords_with_more_words() {
342 assert_eq!(
343 full_keyword(
344 "moz",
345 &[
346 "moz",
347 "mozi",
348 "mozil",
349 "mozill",
350 "mozilla",
351 "mozilla firefox"
352 ]
353 ),
354 "mozilla".to_owned(),
355 );
356 assert_eq!(
357 full_keyword(
358 "mozilla",
359 &[
360 "moz",
361 "mozi",
362 "mozil",
363 "mozill",
364 "mozilla",
365 "mozilla firefox"
366 ]
367 ),
368 "mozilla".to_owned(),
369 );
370 }
371
372 #[test]
373 fn keywords_with_longer_phrase() {
374 assert_eq!(
375 full_keyword("moz", &["moz", "mozi", "mozil", "mozill", "mozilla"]),
376 "mozilla".to_owned()
377 );
378 assert_eq!(
379 full_keyword(
380 "mozilla f",
381 &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
382 ),
383 "mozilla firefox".to_owned()
384 );
385 }
386
387 #[test]
388 fn query_ends_with_space() {
389 assert_eq!(
390 full_keyword(
391 "mozilla ",
392 &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
393 ),
394 "mozilla firefox".to_owned()
395 );
396 }
397
398 fn fmc<T: Clone>(
399 query: &str,
400 max_chunk_size: usize,
401 f: impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
402 ) -> Result<Vec<Vec<T>>> {
403 let words: Vec<_> = query.split_whitespace().collect();
404 filter_map_chunks(&words, max_chunk_size, f)
405 }
406
407 fn check_paths(actual: Vec<Vec<(String, usize)>>, expected: Vec<Vec<(&str, usize)>>) {
408 assert_eq!(
409 actual,
410 expected
411 .into_iter()
412 .map(|p| p
413 .into_iter()
414 .map(|(w, i)| (w.to_string(), i))
415 .collect::<Vec<_>>())
416 .collect::<Vec<Vec<_>>>()
417 );
418 }
419
420 #[test]
421 fn filter_map_chunks_1() -> anyhow::Result<()> {
422 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| {
423 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
424 })?;
425 check_paths(
426 paths,
427 vec![vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]],
428 );
429 Ok(())
430 }
431
432 #[test]
433 fn filter_map_chunks_2() -> anyhow::Result<()> {
434 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| {
435 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
436 })?;
437 check_paths(
438 paths,
439 vec![
440 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
441 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
442 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
443 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
444 vec![("a", 0), ("b c", 1), ("d e", 3)],
445 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
446 vec![("a b", 0), ("c", 2), ("d e", 3)],
447 vec![("a b", 0), ("c d", 2), ("e", 4)],
448 ],
449 );
450 Ok(())
451 }
452
453 #[test]
454 fn filter_map_chunks_3() -> anyhow::Result<()> {
455 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| {
456 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
457 })?;
458 check_paths(
459 paths,
460 vec![
461 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
462 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
463 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
464 vec![("a", 0), ("b", 1), ("c d e", 2)],
465 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
466 vec![("a", 0), ("b c", 1), ("d e", 3)],
467 vec![("a", 0), ("b c d", 1), ("e", 4)],
468 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
469 vec![("a b", 0), ("c", 2), ("d e", 3)],
470 vec![("a b", 0), ("c d", 2), ("e", 4)],
471 vec![("a b", 0), ("c d e", 2)],
472 vec![("a b c", 0), ("d", 3), ("e", 4)],
473 vec![("a b c", 0), ("d e", 3)],
474 ],
475 );
476 Ok(())
477 }
478
479 #[test]
480 fn filter_map_chunks_4() -> anyhow::Result<()> {
481 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| {
482 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
483 })?;
484 check_paths(
485 paths,
486 vec![
487 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
488 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
489 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
490 vec![("a", 0), ("b", 1), ("c d e", 2)],
491 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
492 vec![("a", 0), ("b c", 1), ("d e", 3)],
493 vec![("a", 0), ("b c d", 1), ("e", 4)],
494 vec![("a", 0), ("b c d e", 1)],
495 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
496 vec![("a b", 0), ("c", 2), ("d e", 3)],
497 vec![("a b", 0), ("c d", 2), ("e", 4)],
498 vec![("a b", 0), ("c d e", 2)],
499 vec![("a b c", 0), ("d", 3), ("e", 4)],
500 vec![("a b c", 0), ("d e", 3)],
501 vec![("a b c d", 0), ("e", 4)],
502 ],
503 );
504 Ok(())
505 }
506
507 #[test]
508 fn filter_map_chunks_5() -> anyhow::Result<()> {
509 let paths = fmc("a b c d e", 5, |chunk, chunk_index, _, _| {
510 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
511 })?;
512 check_paths(
513 paths,
514 vec![
515 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
516 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
517 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
518 vec![("a", 0), ("b", 1), ("c d e", 2)],
519 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
520 vec![("a", 0), ("b c", 1), ("d e", 3)],
521 vec![("a", 0), ("b c d", 1), ("e", 4)],
522 vec![("a", 0), ("b c d e", 1)],
523 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
524 vec![("a b", 0), ("c", 2), ("d e", 3)],
525 vec![("a b", 0), ("c d", 2), ("e", 4)],
526 vec![("a b", 0), ("c d e", 2)],
527 vec![("a b c", 0), ("d", 3), ("e", 4)],
528 vec![("a b c", 0), ("d e", 3)],
529 vec![("a b c d", 0), ("e", 4)],
530 vec![("a b c d e", 0)],
531 ],
532 );
533 Ok(())
534 }
535
536 #[test]
537 fn filter_map_chunks_1_map_many() -> anyhow::Result<()> {
538 let paths = fmc("a b c", 1, |chunk, _, _, _| {
539 Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
540 })?;
541 assert_eq!(
542 paths,
543 vec![
544 vec!["a0", "b0", "c0"],
545 vec!["a0", "b0", "c1"],
546 vec!["a0", "b0", "c2"],
547 vec!["a0", "b1", "c0"],
548 vec!["a0", "b1", "c1"],
549 vec!["a0", "b1", "c2"],
550 vec!["a0", "b2", "c0"],
551 vec!["a0", "b2", "c1"],
552 vec!["a0", "b2", "c2"],
553 vec!["a1", "b0", "c0"],
554 vec!["a1", "b0", "c1"],
555 vec!["a1", "b0", "c2"],
556 vec!["a1", "b1", "c0"],
557 vec!["a1", "b1", "c1"],
558 vec!["a1", "b1", "c2"],
559 vec!["a1", "b2", "c0"],
560 vec!["a1", "b2", "c1"],
561 vec!["a1", "b2", "c2"],
562 vec!["a2", "b0", "c0"],
563 vec!["a2", "b0", "c1"],
564 vec!["a2", "b0", "c2"],
565 vec!["a2", "b1", "c0"],
566 vec!["a2", "b1", "c1"],
567 vec!["a2", "b1", "c2"],
568 vec!["a2", "b2", "c0"],
569 vec!["a2", "b2", "c1"],
570 vec!["a2", "b2", "c2"]
571 ]
572 );
573 Ok(())
574 }
575
576 #[test]
577 fn filter_map_chunks_2_map_many() -> anyhow::Result<()> {
578 let paths = fmc("a b c", 2, |chunk, _, _, _| {
579 Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
580 })?;
581 assert_eq!(
582 paths,
583 vec![
584 vec!["a0", "b0", "c0"],
585 vec!["a0", "b0", "c1"],
586 vec!["a0", "b0", "c2"],
587 vec!["a0", "b1", "c0"],
588 vec!["a0", "b1", "c1"],
589 vec!["a0", "b1", "c2"],
590 vec!["a0", "b2", "c0"],
591 vec!["a0", "b2", "c1"],
592 vec!["a0", "b2", "c2"],
593 vec!["a0", "b c0"],
594 vec!["a0", "b c1"],
595 vec!["a0", "b c2"],
596 vec!["a1", "b0", "c0"],
597 vec!["a1", "b0", "c1"],
598 vec!["a1", "b0", "c2"],
599 vec!["a1", "b1", "c0"],
600 vec!["a1", "b1", "c1"],
601 vec!["a1", "b1", "c2"],
602 vec!["a1", "b2", "c0"],
603 vec!["a1", "b2", "c1"],
604 vec!["a1", "b2", "c2"],
605 vec!["a1", "b c0"],
606 vec!["a1", "b c1"],
607 vec!["a1", "b c2"],
608 vec!["a2", "b0", "c0"],
609 vec!["a2", "b0", "c1"],
610 vec!["a2", "b0", "c2"],
611 vec!["a2", "b1", "c0"],
612 vec!["a2", "b1", "c1"],
613 vec!["a2", "b1", "c2"],
614 vec!["a2", "b2", "c0"],
615 vec!["a2", "b2", "c1"],
616 vec!["a2", "b2", "c2"],
617 vec!["a2", "b c0"],
618 vec!["a2", "b c1"],
619 vec!["a2", "b c2"],
620 vec!["a b0", "c0"],
621 vec!["a b0", "c1"],
622 vec!["a b0", "c2"],
623 vec!["a b1", "c0"],
624 vec!["a b1", "c1"],
625 vec!["a b1", "c2"],
626 vec!["a b2", "c0"],
627 vec!["a b2", "c1"],
628 vec!["a b2", "c2"]
629 ]
630 );
631 Ok(())
632 }
633
634 #[test]
635 fn filter_map_chunks_1_prune_a() -> anyhow::Result<()> {
636 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
637 "a" => Ok(None),
638 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
639 })?;
640 check_paths(paths, vec![]);
641 Ok(())
642 }
643
644 #[test]
645 fn filter_map_chunks_1_prune_b() -> anyhow::Result<()> {
646 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
647 "b" => Ok(None),
648 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
649 })?;
650 check_paths(paths, vec![]);
651 Ok(())
652 }
653
654 #[test]
655 fn filter_map_chunks_1_prune_c() -> anyhow::Result<()> {
656 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
657 "c" => Ok(None),
658 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
659 })?;
660 check_paths(paths, vec![]);
661 Ok(())
662 }
663
664 #[test]
665 fn filter_map_chunks_1_prune_d() -> anyhow::Result<()> {
666 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
667 "d" => Ok(None),
668 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
669 })?;
670 check_paths(paths, vec![]);
671 Ok(())
672 }
673
674 #[test]
675 fn filter_map_chunks_1_prune_e() -> anyhow::Result<()> {
676 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
677 "e" => Ok(None),
678 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
679 })?;
680 check_paths(paths, vec![]);
681 Ok(())
682 }
683
684 #[test]
685 fn filter_map_chunks_2_prune_a() -> anyhow::Result<()> {
686 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
687 "a" => Ok(None),
688 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
689 })?;
690 check_paths(
691 paths,
692 vec![
693 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
694 vec![("a b", 0), ("c", 2), ("d e", 3)],
695 vec![("a b", 0), ("c d", 2), ("e", 4)],
696 ],
697 );
698 Ok(())
699 }
700
701 #[test]
702 fn filter_map_chunks_2_prune_b() -> anyhow::Result<()> {
703 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
704 "b" => Ok(None),
705 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
706 })?;
707 check_paths(
708 paths,
709 vec![
710 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
711 vec![("a", 0), ("b c", 1), ("d e", 3)],
712 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
713 vec![("a b", 0), ("c", 2), ("d e", 3)],
714 vec![("a b", 0), ("c d", 2), ("e", 4)],
715 ],
716 );
717 Ok(())
718 }
719
720 #[test]
721 fn filter_map_chunks_2_prune_c() -> anyhow::Result<()> {
722 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
723 "c" => Ok(None),
724 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
725 })?;
726 check_paths(
727 paths,
728 vec![
729 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
730 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
731 vec![("a", 0), ("b c", 1), ("d e", 3)],
732 vec![("a b", 0), ("c d", 2), ("e", 4)],
733 ],
734 );
735 Ok(())
736 }
737
738 #[test]
739 fn filter_map_chunks_2_prune_d() -> anyhow::Result<()> {
740 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
741 "d" => Ok(None),
742 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
743 })?;
744 check_paths(
745 paths,
746 vec![
747 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
748 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
749 vec![("a", 0), ("b c", 1), ("d e", 3)],
750 vec![("a b", 0), ("c", 2), ("d e", 3)],
751 vec![("a b", 0), ("c d", 2), ("e", 4)],
752 ],
753 );
754 Ok(())
755 }
756
757 #[test]
758 fn filter_map_chunks_2_prune_e() -> anyhow::Result<()> {
759 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
760 "e" => Ok(None),
761 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
762 })?;
763 check_paths(
764 paths,
765 vec![
766 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
767 vec![("a", 0), ("b c", 1), ("d e", 3)],
768 vec![("a b", 0), ("c", 2), ("d e", 3)],
769 ],
770 );
771 Ok(())
772 }
773
774 #[test]
775 fn filter_map_chunks_2_prune_ab() -> anyhow::Result<()> {
776 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
777 "a b" => Ok(None),
778 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
779 })?;
780 check_paths(
781 paths,
782 vec![
783 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
784 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
785 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
786 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
787 vec![("a", 0), ("b c", 1), ("d e", 3)],
788 ],
789 );
790 Ok(())
791 }
792
793 #[test]
794 fn filter_map_chunks_2_prune_bc() -> anyhow::Result<()> {
795 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
796 "b c" => Ok(None),
797 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
798 })?;
799 check_paths(
800 paths,
801 vec![
802 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
803 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
804 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
805 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
806 vec![("a b", 0), ("c", 2), ("d e", 3)],
807 vec![("a b", 0), ("c d", 2), ("e", 4)],
808 ],
809 );
810 Ok(())
811 }
812
813 #[test]
814 fn filter_map_chunks_2_prune_cd() -> anyhow::Result<()> {
815 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
816 "c d" => Ok(None),
817 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
818 })?;
819 check_paths(
820 paths,
821 vec![
822 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
823 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
824 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
825 vec![("a", 0), ("b c", 1), ("d e", 3)],
826 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
827 vec![("a b", 0), ("c", 2), ("d e", 3)],
828 ],
829 );
830 Ok(())
831 }
832
833 #[test]
834 fn filter_map_chunks_2_prune_de() -> anyhow::Result<()> {
835 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
836 "d e" => Ok(None),
837 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
838 })?;
839 check_paths(
840 paths,
841 vec![
842 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
843 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
844 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
845 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
846 vec![("a b", 0), ("c d", 2), ("e", 4)],
847 ],
848 );
849 Ok(())
850 }
851
852 #[test]
853 fn filter_map_chunks_2_prune_a_bc() -> anyhow::Result<()> {
854 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
855 "a" | "b c" => Ok(None),
856 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
857 })?;
858 check_paths(
859 paths,
860 vec![
861 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
862 vec![("a b", 0), ("c", 2), ("d e", 3)],
863 vec![("a b", 0), ("c d", 2), ("e", 4)],
864 ],
865 );
866 Ok(())
867 }
868
869 #[test]
870 fn filter_map_chunks_2_prune_a_cd() -> anyhow::Result<()> {
871 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
872 "a" | "c d" => Ok(None),
873 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
874 })?;
875 check_paths(
876 paths,
877 vec![
878 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
879 vec![("a b", 0), ("c", 2), ("d e", 3)],
880 ],
881 );
882 Ok(())
883 }
884
885 #[test]
886 fn filter_map_chunks_2_prune_bc_cd() -> anyhow::Result<()> {
887 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
888 "b c" | "c d" => Ok(None),
889 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
890 })?;
891 check_paths(
892 paths,
893 vec![
894 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
895 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
896 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
897 vec![("a b", 0), ("c", 2), ("d e", 3)],
898 ],
899 );
900 Ok(())
901 }
902
903 #[test]
904 fn filter_map_chunks_2_prune_bc_de() -> anyhow::Result<()> {
905 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
906 "b c" | "d e" => Ok(None),
907 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
908 })?;
909 check_paths(
910 paths,
911 vec![
912 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
913 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
914 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
915 vec![("a b", 0), ("c d", 2), ("e", 4)],
916 ],
917 );
918 Ok(())
919 }
920
921 #[test]
922 fn filter_map_chunks_3_prune_a() -> anyhow::Result<()> {
923 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
924 "a" => Ok(None),
925 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
926 })?;
927 check_paths(
928 paths,
929 vec![
930 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
931 vec![("a b", 0), ("c", 2), ("d e", 3)],
932 vec![("a b", 0), ("c d", 2), ("e", 4)],
933 vec![("a b", 0), ("c d e", 2)],
934 vec![("a b c", 0), ("d", 3), ("e", 4)],
935 vec![("a b c", 0), ("d e", 3)],
936 ],
937 );
938 Ok(())
939 }
940
941 #[test]
942 fn filter_map_chunks_3_prune_b() -> anyhow::Result<()> {
943 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
944 "b" => Ok(None),
945 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
946 })?;
947 check_paths(
948 paths,
949 vec![
950 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
951 vec![("a", 0), ("b c", 1), ("d e", 3)],
952 vec![("a", 0), ("b c d", 1), ("e", 4)],
953 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
954 vec![("a b", 0), ("c", 2), ("d e", 3)],
955 vec![("a b", 0), ("c d", 2), ("e", 4)],
956 vec![("a b", 0), ("c d e", 2)],
957 vec![("a b c", 0), ("d", 3), ("e", 4)],
958 vec![("a b c", 0), ("d e", 3)],
959 ],
960 );
961 Ok(())
962 }
963
964 #[test]
965 fn filter_map_chunks_3_prune_c() -> anyhow::Result<()> {
966 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
967 "c" => Ok(None),
968 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
969 })?;
970 check_paths(
971 paths,
972 vec![
973 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
974 vec![("a", 0), ("b", 1), ("c d e", 2)],
975 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
976 vec![("a", 0), ("b c", 1), ("d e", 3)],
977 vec![("a", 0), ("b c d", 1), ("e", 4)],
978 vec![("a b", 0), ("c d", 2), ("e", 4)],
979 vec![("a b", 0), ("c d e", 2)],
980 vec![("a b c", 0), ("d", 3), ("e", 4)],
981 vec![("a b c", 0), ("d e", 3)],
982 ],
983 );
984 Ok(())
985 }
986
987 #[test]
988 fn filter_map_chunks_3_prune_d() -> anyhow::Result<()> {
989 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
990 "d" => Ok(None),
991 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
992 })?;
993 check_paths(
994 paths,
995 vec![
996 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
997 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
998 vec![("a", 0), ("b", 1), ("c d e", 2)],
999 vec![("a", 0), ("b c", 1), ("d e", 3)],
1000 vec![("a", 0), ("b c d", 1), ("e", 4)],
1001 vec![("a b", 0), ("c", 2), ("d e", 3)],
1002 vec![("a b", 0), ("c d", 2), ("e", 4)],
1003 vec![("a b", 0), ("c d e", 2)],
1004 vec![("a b c", 0), ("d e", 3)],
1005 ],
1006 );
1007 Ok(())
1008 }
1009
1010 #[test]
1011 fn filter_map_chunks_3_prune_e() -> anyhow::Result<()> {
1012 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1013 "e" => Ok(None),
1014 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1015 })?;
1016 check_paths(
1017 paths,
1018 vec![
1019 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1020 vec![("a", 0), ("b", 1), ("c d e", 2)],
1021 vec![("a", 0), ("b c", 1), ("d e", 3)],
1022 vec![("a b", 0), ("c", 2), ("d e", 3)],
1023 vec![("a b", 0), ("c d e", 2)],
1024 vec![("a b c", 0), ("d e", 3)],
1025 ],
1026 );
1027 Ok(())
1028 }
1029
1030 #[test]
1031 fn filter_map_chunks_3_prune_ab() -> anyhow::Result<()> {
1032 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1033 "a b" => Ok(None),
1034 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1035 })?;
1036 check_paths(
1037 paths,
1038 vec![
1039 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1040 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1041 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1042 vec![("a", 0), ("b", 1), ("c d e", 2)],
1043 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1044 vec![("a", 0), ("b c", 1), ("d e", 3)],
1045 vec![("a", 0), ("b c d", 1), ("e", 4)],
1046 vec![("a b c", 0), ("d", 3), ("e", 4)],
1047 vec![("a b c", 0), ("d e", 3)],
1048 ],
1049 );
1050 Ok(())
1051 }
1052
1053 #[test]
1054 fn filter_map_chunks_3_prune_bc() -> anyhow::Result<()> {
1055 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1056 "b c" => Ok(None),
1057 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1058 })?;
1059 check_paths(
1060 paths,
1061 vec![
1062 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1063 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1064 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1065 vec![("a", 0), ("b", 1), ("c d e", 2)],
1066 vec![("a", 0), ("b c d", 1), ("e", 4)],
1067 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1068 vec![("a b", 0), ("c", 2), ("d e", 3)],
1069 vec![("a b", 0), ("c d", 2), ("e", 4)],
1070 vec![("a b", 0), ("c d e", 2)],
1071 vec![("a b c", 0), ("d", 3), ("e", 4)],
1072 vec![("a b c", 0), ("d e", 3)],
1073 ],
1074 );
1075 Ok(())
1076 }
1077
1078 #[test]
1079 fn filter_map_chunks_3_prune_cd() -> anyhow::Result<()> {
1080 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1081 "c d" => Ok(None),
1082 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1083 })?;
1084 check_paths(
1085 paths,
1086 vec![
1087 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1088 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1089 vec![("a", 0), ("b", 1), ("c d e", 2)],
1090 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1091 vec![("a", 0), ("b c", 1), ("d e", 3)],
1092 vec![("a", 0), ("b c d", 1), ("e", 4)],
1093 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1094 vec![("a b", 0), ("c", 2), ("d e", 3)],
1095 vec![("a b", 0), ("c d e", 2)],
1096 vec![("a b c", 0), ("d", 3), ("e", 4)],
1097 vec![("a b c", 0), ("d e", 3)],
1098 ],
1099 );
1100 Ok(())
1101 }
1102
1103 #[test]
1104 fn filter_map_chunks_3_prune_de() -> anyhow::Result<()> {
1105 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1106 "d e" => Ok(None),
1107 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1108 })?;
1109 check_paths(
1110 paths,
1111 vec![
1112 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1113 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1114 vec![("a", 0), ("b", 1), ("c d e", 2)],
1115 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1116 vec![("a", 0), ("b c d", 1), ("e", 4)],
1117 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1118 vec![("a b", 0), ("c d", 2), ("e", 4)],
1119 vec![("a b", 0), ("c d e", 2)],
1120 vec![("a b c", 0), ("d", 3), ("e", 4)],
1121 ],
1122 );
1123 Ok(())
1124 }
1125
1126 #[test]
1127 fn filter_map_chunks_3_prune_abc() -> anyhow::Result<()> {
1128 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1129 "a b c" => Ok(None),
1130 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1131 })?;
1132 check_paths(
1133 paths,
1134 vec![
1135 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1136 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1137 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1138 vec![("a", 0), ("b", 1), ("c d e", 2)],
1139 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1140 vec![("a", 0), ("b c", 1), ("d e", 3)],
1141 vec![("a", 0), ("b c d", 1), ("e", 4)],
1142 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1143 vec![("a b", 0), ("c", 2), ("d e", 3)],
1144 vec![("a b", 0), ("c d", 2), ("e", 4)],
1145 vec![("a b", 0), ("c d e", 2)],
1146 ],
1147 );
1148 Ok(())
1149 }
1150
1151 #[test]
1152 fn filter_map_chunks_3_prune_bcd() -> anyhow::Result<()> {
1153 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1154 "b c d" => Ok(None),
1155 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1156 })?;
1157 check_paths(
1158 paths,
1159 vec![
1160 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1161 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1162 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1163 vec![("a", 0), ("b", 1), ("c d e", 2)],
1164 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1165 vec![("a", 0), ("b c", 1), ("d e", 3)],
1166 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1167 vec![("a b", 0), ("c", 2), ("d e", 3)],
1168 vec![("a b", 0), ("c d", 2), ("e", 4)],
1169 vec![("a b", 0), ("c d e", 2)],
1170 vec![("a b c", 0), ("d", 3), ("e", 4)],
1171 vec![("a b c", 0), ("d e", 3)],
1172 ],
1173 );
1174 Ok(())
1175 }
1176
1177 #[test]
1178 fn filter_map_chunks_3_prune_cde() -> anyhow::Result<()> {
1179 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1180 "c d e" => Ok(None),
1181 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1182 })?;
1183 check_paths(
1184 paths,
1185 vec![
1186 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1187 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1188 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1189 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1190 vec![("a", 0), ("b c", 1), ("d e", 3)],
1191 vec![("a", 0), ("b c d", 1), ("e", 4)],
1192 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1193 vec![("a b", 0), ("c", 2), ("d e", 3)],
1194 vec![("a b", 0), ("c d", 2), ("e", 4)],
1195 vec![("a b c", 0), ("d", 3), ("e", 4)],
1196 vec![("a b c", 0), ("d e", 3)],
1197 ],
1198 );
1199 Ok(())
1200 }
1201
1202 #[test]
1203 fn filter_map_chunks_3_prune_a_bc_cde() -> anyhow::Result<()> {
1204 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1205 "a" | "b c" | "c d e" => Ok(None),
1206 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1207 })?;
1208 check_paths(
1209 paths,
1210 vec![
1211 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1212 vec![("a b", 0), ("c", 2), ("d e", 3)],
1213 vec![("a b", 0), ("c d", 2), ("e", 4)],
1214 vec![("a b c", 0), ("d", 3), ("e", 4)],
1215 vec![("a b c", 0), ("d e", 3)],
1216 ],
1217 );
1218 Ok(())
1219 }
1220
1221 #[test]
1222 fn filter_map_chunks_4_prune_a() -> anyhow::Result<()> {
1223 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1224 "a" => Ok(None),
1225 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1226 })?;
1227 check_paths(
1228 paths,
1229 vec![
1230 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1231 vec![("a b", 0), ("c", 2), ("d e", 3)],
1232 vec![("a b", 0), ("c d", 2), ("e", 4)],
1233 vec![("a b", 0), ("c d e", 2)],
1234 vec![("a b c", 0), ("d", 3), ("e", 4)],
1235 vec![("a b c", 0), ("d e", 3)],
1236 vec![("a b c d", 0), ("e", 4)],
1237 ],
1238 );
1239 Ok(())
1240 }
1241
1242 #[test]
1243 fn filter_map_chunks_4_prune_b() -> anyhow::Result<()> {
1244 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1245 "b" => Ok(None),
1246 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1247 })?;
1248 check_paths(
1249 paths,
1250 vec![
1251 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1252 vec![("a", 0), ("b c", 1), ("d e", 3)],
1253 vec![("a", 0), ("b c d", 1), ("e", 4)],
1254 vec![("a", 0), ("b c d e", 1)],
1255 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1256 vec![("a b", 0), ("c", 2), ("d e", 3)],
1257 vec![("a b", 0), ("c d", 2), ("e", 4)],
1258 vec![("a b", 0), ("c d e", 2)],
1259 vec![("a b c", 0), ("d", 3), ("e", 4)],
1260 vec![("a b c", 0), ("d e", 3)],
1261 vec![("a b c d", 0), ("e", 4)],
1262 ],
1263 );
1264 Ok(())
1265 }
1266
1267 #[test]
1268 fn filter_map_chunks_4_prune_c() -> anyhow::Result<()> {
1269 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1270 "c" => Ok(None),
1271 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1272 })?;
1273 check_paths(
1274 paths,
1275 vec![
1276 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1277 vec![("a", 0), ("b", 1), ("c d e", 2)],
1278 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1279 vec![("a", 0), ("b c", 1), ("d e", 3)],
1280 vec![("a", 0), ("b c d", 1), ("e", 4)],
1281 vec![("a", 0), ("b c d e", 1)],
1282 vec![("a b", 0), ("c d", 2), ("e", 4)],
1283 vec![("a b", 0), ("c d e", 2)],
1284 vec![("a b c", 0), ("d", 3), ("e", 4)],
1285 vec![("a b c", 0), ("d e", 3)],
1286 vec![("a b c d", 0), ("e", 4)],
1287 ],
1288 );
1289 Ok(())
1290 }
1291
1292 #[test]
1293 fn filter_map_chunks_4_prune_d() -> anyhow::Result<()> {
1294 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1295 "d" => Ok(None),
1296 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1297 })?;
1298 check_paths(
1299 paths,
1300 vec![
1301 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1302 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1303 vec![("a", 0), ("b", 1), ("c d e", 2)],
1304 vec![("a", 0), ("b c", 1), ("d e", 3)],
1305 vec![("a", 0), ("b c d", 1), ("e", 4)],
1306 vec![("a", 0), ("b c d e", 1)],
1307 vec![("a b", 0), ("c", 2), ("d e", 3)],
1308 vec![("a b", 0), ("c d", 2), ("e", 4)],
1309 vec![("a b", 0), ("c d e", 2)],
1310 vec![("a b c", 0), ("d e", 3)],
1311 vec![("a b c d", 0), ("e", 4)],
1312 ],
1313 );
1314 Ok(())
1315 }
1316
1317 #[test]
1318 fn filter_map_chunks_4_prune_e() -> anyhow::Result<()> {
1319 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1320 "e" => Ok(None),
1321 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1322 })?;
1323 check_paths(
1324 paths,
1325 vec![
1326 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1327 vec![("a", 0), ("b", 1), ("c d e", 2)],
1328 vec![("a", 0), ("b c", 1), ("d e", 3)],
1329 vec![("a", 0), ("b c d e", 1)],
1330 vec![("a b", 0), ("c", 2), ("d e", 3)],
1331 vec![("a b", 0), ("c d e", 2)],
1332 vec![("a b c", 0), ("d e", 3)],
1333 ],
1334 );
1335 Ok(())
1336 }
1337
1338 #[test]
1339 fn filter_map_chunks_4_prune_ab() -> anyhow::Result<()> {
1340 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1341 "a b" => Ok(None),
1342 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1343 })?;
1344 check_paths(
1345 paths,
1346 vec![
1347 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1348 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1349 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1350 vec![("a", 0), ("b", 1), ("c d e", 2)],
1351 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1352 vec![("a", 0), ("b c", 1), ("d e", 3)],
1353 vec![("a", 0), ("b c d", 1), ("e", 4)],
1354 vec![("a", 0), ("b c d e", 1)],
1355 vec![("a b c", 0), ("d", 3), ("e", 4)],
1356 vec![("a b c", 0), ("d e", 3)],
1357 vec![("a b c d", 0), ("e", 4)],
1358 ],
1359 );
1360 Ok(())
1361 }
1362
1363 #[test]
1364 fn filter_map_chunks_4_prune_bc() -> anyhow::Result<()> {
1365 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1366 "b c" => Ok(None),
1367 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1368 })?;
1369 check_paths(
1370 paths,
1371 vec![
1372 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1373 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1374 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1375 vec![("a", 0), ("b", 1), ("c d e", 2)],
1376 vec![("a", 0), ("b c d", 1), ("e", 4)],
1377 vec![("a", 0), ("b c d e", 1)],
1378 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1379 vec![("a b", 0), ("c", 2), ("d e", 3)],
1380 vec![("a b", 0), ("c d", 2), ("e", 4)],
1381 vec![("a b", 0), ("c d e", 2)],
1382 vec![("a b c", 0), ("d", 3), ("e", 4)],
1383 vec![("a b c", 0), ("d e", 3)],
1384 vec![("a b c d", 0), ("e", 4)],
1385 ],
1386 );
1387 Ok(())
1388 }
1389
1390 #[test]
1391 fn filter_map_chunks_4_prune_cd() -> anyhow::Result<()> {
1392 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1393 "c d" => Ok(None),
1394 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1395 })?;
1396 check_paths(
1397 paths,
1398 vec![
1399 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1400 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1401 vec![("a", 0), ("b", 1), ("c d e", 2)],
1402 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1403 vec![("a", 0), ("b c", 1), ("d e", 3)],
1404 vec![("a", 0), ("b c d", 1), ("e", 4)],
1405 vec![("a", 0), ("b c d e", 1)],
1406 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1407 vec![("a b", 0), ("c", 2), ("d e", 3)],
1408 vec![("a b", 0), ("c d e", 2)],
1409 vec![("a b c", 0), ("d", 3), ("e", 4)],
1410 vec![("a b c", 0), ("d e", 3)],
1411 vec![("a b c d", 0), ("e", 4)],
1412 ],
1413 );
1414 Ok(())
1415 }
1416
1417 #[test]
1418 fn filter_map_chunks_4_prune_de() -> anyhow::Result<()> {
1419 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1420 "d e" => Ok(None),
1421 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1422 })?;
1423 check_paths(
1424 paths,
1425 vec![
1426 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1427 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1428 vec![("a", 0), ("b", 1), ("c d e", 2)],
1429 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1430 vec![("a", 0), ("b c d", 1), ("e", 4)],
1431 vec![("a", 0), ("b c d e", 1)],
1432 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1433 vec![("a b", 0), ("c d", 2), ("e", 4)],
1434 vec![("a b", 0), ("c d e", 2)],
1435 vec![("a b c", 0), ("d", 3), ("e", 4)],
1436 vec![("a b c d", 0), ("e", 4)],
1437 ],
1438 );
1439 Ok(())
1440 }
1441
1442 #[test]
1443 fn filter_map_chunks_4_prune_abc() -> anyhow::Result<()> {
1444 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1445 "a b c" => Ok(None),
1446 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1447 })?;
1448 check_paths(
1449 paths,
1450 vec![
1451 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1452 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1453 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1454 vec![("a", 0), ("b", 1), ("c d e", 2)],
1455 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1456 vec![("a", 0), ("b c", 1), ("d e", 3)],
1457 vec![("a", 0), ("b c d", 1), ("e", 4)],
1458 vec![("a", 0), ("b c d e", 1)],
1459 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1460 vec![("a b", 0), ("c", 2), ("d e", 3)],
1461 vec![("a b", 0), ("c d", 2), ("e", 4)],
1462 vec![("a b", 0), ("c d e", 2)],
1463 vec![("a b c d", 0), ("e", 4)],
1464 ],
1465 );
1466 Ok(())
1467 }
1468
1469 #[test]
1470 fn filter_map_chunks_4_prune_bcd() -> anyhow::Result<()> {
1471 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1472 "b c d" => Ok(None),
1473 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1474 })?;
1475 check_paths(
1476 paths,
1477 vec![
1478 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1479 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1480 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1481 vec![("a", 0), ("b", 1), ("c d e", 2)],
1482 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1483 vec![("a", 0), ("b c", 1), ("d e", 3)],
1484 vec![("a", 0), ("b c d e", 1)],
1485 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1486 vec![("a b", 0), ("c", 2), ("d e", 3)],
1487 vec![("a b", 0), ("c d", 2), ("e", 4)],
1488 vec![("a b", 0), ("c d e", 2)],
1489 vec![("a b c", 0), ("d", 3), ("e", 4)],
1490 vec![("a b c", 0), ("d e", 3)],
1491 vec![("a b c d", 0), ("e", 4)],
1492 ],
1493 );
1494 Ok(())
1495 }
1496
1497 #[test]
1498 fn filter_map_chunks_4_prune_cde() -> anyhow::Result<()> {
1499 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1500 "c d e" => Ok(None),
1501 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1502 })?;
1503 check_paths(
1504 paths,
1505 vec![
1506 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1507 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1508 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1509 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1510 vec![("a", 0), ("b c", 1), ("d e", 3)],
1511 vec![("a", 0), ("b c d", 1), ("e", 4)],
1512 vec![("a", 0), ("b c d e", 1)],
1513 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1514 vec![("a b", 0), ("c", 2), ("d e", 3)],
1515 vec![("a b", 0), ("c d", 2), ("e", 4)],
1516 vec![("a b c", 0), ("d", 3), ("e", 4)],
1517 vec![("a b c", 0), ("d e", 3)],
1518 vec![("a b c d", 0), ("e", 4)],
1519 ],
1520 );
1521 Ok(())
1522 }
1523
1524 #[test]
1525 fn filter_map_chunks_4_prune_abcd() -> anyhow::Result<()> {
1526 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1527 "a b c d" => Ok(None),
1528 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1529 })?;
1530 check_paths(
1531 paths,
1532 vec![
1533 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1534 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1535 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1536 vec![("a", 0), ("b", 1), ("c d e", 2)],
1537 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1538 vec![("a", 0), ("b c", 1), ("d e", 3)],
1539 vec![("a", 0), ("b c d", 1), ("e", 4)],
1540 vec![("a", 0), ("b c d e", 1)],
1541 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1542 vec![("a b", 0), ("c", 2), ("d e", 3)],
1543 vec![("a b", 0), ("c d", 2), ("e", 4)],
1544 vec![("a b", 0), ("c d e", 2)],
1545 vec![("a b c", 0), ("d", 3), ("e", 4)],
1546 vec![("a b c", 0), ("d e", 3)],
1547 ],
1548 );
1549 Ok(())
1550 }
1551
1552 #[test]
1553 fn filter_map_chunks_4_prune_bcde() -> anyhow::Result<()> {
1554 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1555 "b c d e" => Ok(None),
1556 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1557 })?;
1558 check_paths(
1559 paths,
1560 vec![
1561 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1562 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1563 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1564 vec![("a", 0), ("b", 1), ("c d e", 2)],
1565 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1566 vec![("a", 0), ("b c", 1), ("d e", 3)],
1567 vec![("a", 0), ("b c d", 1), ("e", 4)],
1568 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1569 vec![("a b", 0), ("c", 2), ("d e", 3)],
1570 vec![("a b", 0), ("c d", 2), ("e", 4)],
1571 vec![("a b", 0), ("c d e", 2)],
1572 vec![("a b c", 0), ("d", 3), ("e", 4)],
1573 vec![("a b c", 0), ("d e", 3)],
1574 vec![("a b c d", 0), ("e", 4)],
1575 ],
1576 );
1577 Ok(())
1578 }
1579
1580 #[test]
1581 fn filter_map_chunks_4_prune_a_bc_de() -> anyhow::Result<()> {
1582 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1583 "a" | "b c" | "d e" => Ok(None),
1584 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1585 })?;
1586 check_paths(
1587 paths,
1588 vec![
1589 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1590 vec![("a b", 0), ("c d", 2), ("e", 4)],
1591 vec![("a b", 0), ("c d e", 2)],
1592 vec![("a b c", 0), ("d", 3), ("e", 4)],
1593 vec![("a b c d", 0), ("e", 4)],
1594 ],
1595 );
1596 Ok(())
1597 }
1598
1599 #[test]
1600 fn filter_map_chunks_4_prune_a_bc_cde() -> anyhow::Result<()> {
1601 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1602 "a" | "b c" | "c d e" => Ok(None),
1603 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1604 })?;
1605 check_paths(
1606 paths,
1607 vec![
1608 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1609 vec![("a b", 0), ("c", 2), ("d e", 3)],
1610 vec![("a b", 0), ("c d", 2), ("e", 4)],
1611 vec![("a b c", 0), ("d", 3), ("e", 4)],
1612 vec![("a b c", 0), ("d e", 3)],
1613 vec![("a b c d", 0), ("e", 4)],
1614 ],
1615 );
1616 Ok(())
1617 }
1618
1619 #[test]
1620 fn filter_map_chunks_spaces() -> anyhow::Result<()> {
1621 let paths = fmc(" a b c d e ", 2, |chunk, chunk_index, _, _| {
1622 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
1623 })?;
1624 check_paths(
1625 paths,
1626 vec![
1627 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1628 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1629 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1630 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1631 vec![("a", 0), ("b c", 1), ("d e", 3)],
1632 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1633 vec![("a b", 0), ("c", 2), ("d e", 3)],
1634 vec![("a b", 0), ("c d", 2), ("e", 4)],
1635 ],
1636 );
1637 Ok(())
1638 }
1639
1640 #[test]
1641 fn filter_map_chunks_is_last_1() -> anyhow::Result<()> {
1642 let paths = fmc("a b c d e", 1, |chunk, _, is_last, _| {
1643 Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1644 })?;
1645 check_paths(
1646 paths,
1647 vec![vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)]],
1648 );
1649 Ok(())
1650 }
1651
1652 #[test]
1653 fn filter_map_chunks_is_last_2() -> anyhow::Result<()> {
1654 let paths = fmc("a b c d e", 2, |chunk, _, is_last, _| {
1655 Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1656 })?;
1657 check_paths(
1658 paths,
1659 vec![
1660 vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)],
1661 vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)],
1662 vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)],
1663 vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)],
1664 vec![("a", 0), ("b c", 0), ("d e", 1)],
1665 vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)],
1666 vec![("a b", 0), ("c", 0), ("d e", 1)],
1667 vec![("a b", 0), ("c d", 0), ("e", 1)],
1668 ],
1669 );
1670 Ok(())
1671 }
1672
1673 #[test]
1674 fn filter_map_chunks_is_last_3() -> anyhow::Result<()> {
1675 let paths = fmc("a b c d e", 3, |chunk, _, is_last, _| {
1676 Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1677 })?;
1678 check_paths(
1679 paths,
1680 vec![
1681 vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)],
1682 vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)],
1683 vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)],
1684 vec![("a", 0), ("b", 0), ("c d e", 1)],
1685 vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)],
1686 vec![("a", 0), ("b c", 0), ("d e", 1)],
1687 vec![("a", 0), ("b c d", 0), ("e", 1)],
1688 vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)],
1689 vec![("a b", 0), ("c", 0), ("d e", 1)],
1690 vec![("a b", 0), ("c d", 0), ("e", 1)],
1691 vec![("a b", 0), ("c d e", 1)],
1692 vec![("a b c", 0), ("d", 0), ("e", 1)],
1693 vec![("a b c", 0), ("d e", 1)],
1694 ],
1695 );
1696 Ok(())
1697 }
1698
1699 #[test]
1700 fn test_split_keyword() {
1701 assert_eq!(split_keyword("foo"), ("foo", ""));
1702 assert_eq!(split_keyword("foo bar baz"), ("foo", "bar baz"));
1703 }
1704
1705 #[test]
1706 fn i18n_transform() -> anyhow::Result<()> {
1707 let tests = [
1709 ("AbC", "AbC"),
1710 ("AbC dEf", "AbC dEf"),
1711 ("Àęí", "Aei"),
1712 ("Qu\u{00e9}bec", "Quebec"),
1714 ("Que\u{0301}bec", "Quebec"),
1716 ("Gößnitz", "Goßnitz"),
1717 ("St. Louis", "St Louis"),
1718 ("Washington, D.C.", "Washington DC"),
1719 ("U.S.A.", "USA"),
1720 ("Carmel-by-the-Sea", "Carmel by the Sea"),
1721 ("Val-d'Or", "Val dOr"),
1722 ("Val-d’Or", "Val dOr"),
1723 (".,-'()[]?<>", " ()[]?<>"),
1724 ];
1725 for (test_str, expected_str) in tests {
1726 assert_eq!(
1727 super::i18n_transform(test_str),
1728 expected_str,
1729 "Transform test str: {:?}",
1730 test_str
1731 );
1732 }
1733 Ok(())
1734 }
1735
1736 #[test]
1737 fn i18n_cmp() -> anyhow::Result<()> {
1738 let tests = [
1739 ["AbC xYz", "ABC XYZ", "abc xyz"].as_slice(),
1740 &["Àęí", "Aei", "àęí", "aei"],
1741 &[
1742 "Qu\u{00e9}bec",
1744 "Que\u{0301}bec",
1746 "Quebec",
1747 "quebec",
1748 ],
1749 &[
1750 "Gößnitz",
1751 "Gössnitz",
1752 "Goßnitz",
1753 "Gossnitz",
1754 "gößnitz",
1755 "gössnitz",
1756 "goßnitz",
1757 "gossnitz",
1758 ],
1759 &["St. Louis", "St... Louis", "St Louis", "st louis"],
1760 &[
1761 "Washington, D.C.",
1762 "Washington, DC",
1763 "Washington D.C.",
1764 "Washington DC",
1765 "washington dc",
1766 ],
1767 &[
1768 "U.S.A.", "US.A.", "U.SA.", "U.S.A", "USA.", "U.SA", "USA", "usa",
1769 ],
1770 &[
1771 "Val-d'Or",
1772 "Val-d’Or",
1773 "Val-dOr",
1774 "Val d'Or",
1775 "Val d’Or",
1776 "Val dOr",
1777 "val dor",
1778 ],
1779 &[
1780 "Carmel-by-the-Sea",
1781 "Carmel by the Sea",
1782 "carmel by the sea",
1783 ],
1784 &[".,-'()[]?<>", " ()[]?<>"],
1785 ];
1786 for strs in tests {
1787 for a_and_b in strs.iter().permutations(2) {
1788 assert_eq!(
1789 super::i18n_cmp(a_and_b[0], a_and_b[1]),
1790 std::cmp::Ordering::Equal,
1791 "Comparing: {:?}",
1792 a_and_b
1793 );
1794 }
1795 }
1796 Ok(())
1797 }
1798}