1use std::borrow::Cow;
7use unicase::UniCase;
8use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
9
10use crate::Result;
11
12pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
21 let query_words_len = query.split_whitespace().count();
22 let min_phrase_words_len = if query.ends_with(char::is_whitespace) {
23 query_words_len + 1
26 } else {
27 query_words_len
28 };
29 keywords
30 .iter()
31 .map(AsRef::as_ref)
32 .filter(|phrase| phrase.starts_with(query))
33 .map(|phrase| phrase.split_whitespace().collect::<Vec<_>>())
34 .find(|phrase_words| phrase_words.len() > min_phrase_words_len)
35 .map(|phrase_words| phrase_words[..min_phrase_words_len].join(" "))
36 .unwrap_or_else(|| {
37 keywords
38 .iter()
39 .map(AsRef::as_ref)
40 .filter(|phrase| phrase.starts_with(query) && query.len() < phrase.len())
41 .max_by_key(|phrase| phrase.trim().len())
42 .unwrap_or(query)
43 .to_owned()
44 })
45}
46
47pub fn filter_map_chunks<T: Clone>(
168 words: &[&str],
169 max_chunk_size: usize,
170 f: impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
171) -> Result<Vec<Vec<T>>> {
172 let normalized_query = words.join(" ");
173 filter_map_chunks_recurse(words, &normalized_query, &mut vec![], 0, max_chunk_size, &f)
174}
175
176fn filter_map_chunks_recurse<T: Clone>(
185 remaining_words: &[&str],
186 remaining_query: &str,
187 path: &mut Vec<T>,
188 chunk_index: usize,
189 max_chunk_size: usize,
190 f: &impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
191) -> Result<Vec<Vec<T>>> {
192 let mut this_step_paths: Vec<Vec<T>> = vec![];
195
196 for chunk_size in 1..=max_chunk_size {
197 if remaining_words.len() < chunk_size {
198 break;
202 }
203
204 let chunk_byte_len = remaining_words[..chunk_size]
209 .iter()
210 .fold(chunk_size - 1, |memo, w| memo + w.len());
211 let chunk = &remaining_query[..chunk_byte_len];
212 let is_last_chunk = chunk_size == remaining_words.len();
213
214 if let Some(mapped_values) = f(chunk, chunk_index, is_last_chunk, &path[..])? {
216 for value in mapped_values {
217 if is_last_chunk {
218 this_step_paths.push(vec![value.clone()]);
220 } else {
221 path.push(value.clone());
225 let subtree_paths = filter_map_chunks_recurse(
226 &remaining_words[chunk_size..],
227 &remaining_query[(chunk_byte_len + 1)..],
228 path,
229 chunk_index + chunk_size,
230 max_chunk_size,
231 f,
232 )?;
233 path.pop();
234 for mut p in subtree_paths {
235 p.insert(0, value.clone());
236 this_step_paths.push(p);
237 }
238 }
239 }
240 }
241 }
242
243 Ok(this_step_paths)
244}
245
246pub fn split_keyword(keyword: &str) -> (&str, &str) {
250 keyword.split_once(' ').unwrap_or((keyword, ""))
251}
252
253pub fn i18n_cmp(a: &str, b: &str) -> std::cmp::Ordering {
257 UniCase::new(i18n_transform(a)).cmp(&UniCase::new(i18n_transform(b)))
258}
259
260pub fn i18n_transform(s: &str) -> Cow<'_, str> {
266 macro_rules! pattern_remove {
271 () => {
272 '.' | ',' | '\'' | '’'
273 };
274 }
275
276 macro_rules! pattern_replace_with_space {
280 () => {
281 '-'
282 };
283 }
284
285 macro_rules! pattern_all {
286 () => {
287 pattern_remove!() | pattern_replace_with_space!()
288 };
289 }
290
291 let borrowable = !s
292 .nfkd()
293 .any(|c| is_combining_mark(c) || matches!(c, pattern_all!()));
294
295 if borrowable {
296 Cow::from(s)
297 } else {
298 s.nfkd()
299 .filter_map(|c| {
300 if is_combining_mark(c) {
301 None
304 } else {
305 match c {
306 pattern_remove!() => None,
307 pattern_replace_with_space!() => Some(' '),
308 _ => Some(c),
309 }
310 }
311 })
312 .collect::<_>()
313 }
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319 use itertools::Itertools;
320
321 #[test]
322 fn keywords_with_more_words() {
323 assert_eq!(
324 full_keyword(
325 "moz",
326 &[
327 "moz",
328 "mozi",
329 "mozil",
330 "mozill",
331 "mozilla",
332 "mozilla firefox"
333 ]
334 ),
335 "mozilla".to_owned(),
336 );
337 assert_eq!(
338 full_keyword(
339 "mozilla",
340 &[
341 "moz",
342 "mozi",
343 "mozil",
344 "mozill",
345 "mozilla",
346 "mozilla firefox"
347 ]
348 ),
349 "mozilla".to_owned(),
350 );
351 }
352
353 #[test]
354 fn keywords_with_longer_phrase() {
355 assert_eq!(
356 full_keyword("moz", &["moz", "mozi", "mozil", "mozill", "mozilla"]),
357 "mozilla".to_owned()
358 );
359 assert_eq!(
360 full_keyword(
361 "mozilla f",
362 &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
363 ),
364 "mozilla firefox".to_owned()
365 );
366 }
367
368 #[test]
369 fn query_ends_with_space() {
370 assert_eq!(
371 full_keyword(
372 "mozilla ",
373 &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
374 ),
375 "mozilla firefox".to_owned()
376 );
377 }
378
379 fn fmc<T: Clone>(
380 query: &str,
381 max_chunk_size: usize,
382 f: impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
383 ) -> Result<Vec<Vec<T>>> {
384 let words: Vec<_> = query.split_whitespace().collect();
385 filter_map_chunks(&words, max_chunk_size, f)
386 }
387
388 fn check_paths(actual: Vec<Vec<(String, usize)>>, expected: Vec<Vec<(&str, usize)>>) {
389 assert_eq!(
390 actual,
391 expected
392 .into_iter()
393 .map(|p| p
394 .into_iter()
395 .map(|(w, i)| (w.to_string(), i))
396 .collect::<Vec<_>>())
397 .collect::<Vec<Vec<_>>>()
398 );
399 }
400
401 #[test]
402 fn filter_map_chunks_1() -> anyhow::Result<()> {
403 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| {
404 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
405 })?;
406 check_paths(
407 paths,
408 vec![vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]],
409 );
410 Ok(())
411 }
412
413 #[test]
414 fn filter_map_chunks_2() -> anyhow::Result<()> {
415 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| {
416 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
417 })?;
418 check_paths(
419 paths,
420 vec![
421 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
422 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
423 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
424 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
425 vec![("a", 0), ("b c", 1), ("d e", 3)],
426 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
427 vec![("a b", 0), ("c", 2), ("d e", 3)],
428 vec![("a b", 0), ("c d", 2), ("e", 4)],
429 ],
430 );
431 Ok(())
432 }
433
434 #[test]
435 fn filter_map_chunks_3() -> anyhow::Result<()> {
436 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| {
437 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
438 })?;
439 check_paths(
440 paths,
441 vec![
442 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
443 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
444 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
445 vec![("a", 0), ("b", 1), ("c d e", 2)],
446 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
447 vec![("a", 0), ("b c", 1), ("d e", 3)],
448 vec![("a", 0), ("b c d", 1), ("e", 4)],
449 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
450 vec![("a b", 0), ("c", 2), ("d e", 3)],
451 vec![("a b", 0), ("c d", 2), ("e", 4)],
452 vec![("a b", 0), ("c d e", 2)],
453 vec![("a b c", 0), ("d", 3), ("e", 4)],
454 vec![("a b c", 0), ("d e", 3)],
455 ],
456 );
457 Ok(())
458 }
459
460 #[test]
461 fn filter_map_chunks_4() -> anyhow::Result<()> {
462 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| {
463 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
464 })?;
465 check_paths(
466 paths,
467 vec![
468 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
469 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
470 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
471 vec![("a", 0), ("b", 1), ("c d e", 2)],
472 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
473 vec![("a", 0), ("b c", 1), ("d e", 3)],
474 vec![("a", 0), ("b c d", 1), ("e", 4)],
475 vec![("a", 0), ("b c d e", 1)],
476 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
477 vec![("a b", 0), ("c", 2), ("d e", 3)],
478 vec![("a b", 0), ("c d", 2), ("e", 4)],
479 vec![("a b", 0), ("c d e", 2)],
480 vec![("a b c", 0), ("d", 3), ("e", 4)],
481 vec![("a b c", 0), ("d e", 3)],
482 vec![("a b c d", 0), ("e", 4)],
483 ],
484 );
485 Ok(())
486 }
487
488 #[test]
489 fn filter_map_chunks_5() -> anyhow::Result<()> {
490 let paths = fmc("a b c d e", 5, |chunk, chunk_index, _, _| {
491 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
492 })?;
493 check_paths(
494 paths,
495 vec![
496 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
497 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
498 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
499 vec![("a", 0), ("b", 1), ("c d e", 2)],
500 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
501 vec![("a", 0), ("b c", 1), ("d e", 3)],
502 vec![("a", 0), ("b c d", 1), ("e", 4)],
503 vec![("a", 0), ("b c d e", 1)],
504 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
505 vec![("a b", 0), ("c", 2), ("d e", 3)],
506 vec![("a b", 0), ("c d", 2), ("e", 4)],
507 vec![("a b", 0), ("c d e", 2)],
508 vec![("a b c", 0), ("d", 3), ("e", 4)],
509 vec![("a b c", 0), ("d e", 3)],
510 vec![("a b c d", 0), ("e", 4)],
511 vec![("a b c d e", 0)],
512 ],
513 );
514 Ok(())
515 }
516
517 #[test]
518 fn filter_map_chunks_1_map_many() -> anyhow::Result<()> {
519 let paths = fmc("a b c", 1, |chunk, _, _, _| {
520 Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
521 })?;
522 assert_eq!(
523 paths,
524 vec![
525 vec!["a0", "b0", "c0"],
526 vec!["a0", "b0", "c1"],
527 vec!["a0", "b0", "c2"],
528 vec!["a0", "b1", "c0"],
529 vec!["a0", "b1", "c1"],
530 vec!["a0", "b1", "c2"],
531 vec!["a0", "b2", "c0"],
532 vec!["a0", "b2", "c1"],
533 vec!["a0", "b2", "c2"],
534 vec!["a1", "b0", "c0"],
535 vec!["a1", "b0", "c1"],
536 vec!["a1", "b0", "c2"],
537 vec!["a1", "b1", "c0"],
538 vec!["a1", "b1", "c1"],
539 vec!["a1", "b1", "c2"],
540 vec!["a1", "b2", "c0"],
541 vec!["a1", "b2", "c1"],
542 vec!["a1", "b2", "c2"],
543 vec!["a2", "b0", "c0"],
544 vec!["a2", "b0", "c1"],
545 vec!["a2", "b0", "c2"],
546 vec!["a2", "b1", "c0"],
547 vec!["a2", "b1", "c1"],
548 vec!["a2", "b1", "c2"],
549 vec!["a2", "b2", "c0"],
550 vec!["a2", "b2", "c1"],
551 vec!["a2", "b2", "c2"]
552 ]
553 );
554 Ok(())
555 }
556
557 #[test]
558 fn filter_map_chunks_2_map_many() -> anyhow::Result<()> {
559 let paths = fmc("a b c", 2, |chunk, _, _, _| {
560 Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
561 })?;
562 assert_eq!(
563 paths,
564 vec![
565 vec!["a0", "b0", "c0"],
566 vec!["a0", "b0", "c1"],
567 vec!["a0", "b0", "c2"],
568 vec!["a0", "b1", "c0"],
569 vec!["a0", "b1", "c1"],
570 vec!["a0", "b1", "c2"],
571 vec!["a0", "b2", "c0"],
572 vec!["a0", "b2", "c1"],
573 vec!["a0", "b2", "c2"],
574 vec!["a0", "b c0"],
575 vec!["a0", "b c1"],
576 vec!["a0", "b c2"],
577 vec!["a1", "b0", "c0"],
578 vec!["a1", "b0", "c1"],
579 vec!["a1", "b0", "c2"],
580 vec!["a1", "b1", "c0"],
581 vec!["a1", "b1", "c1"],
582 vec!["a1", "b1", "c2"],
583 vec!["a1", "b2", "c0"],
584 vec!["a1", "b2", "c1"],
585 vec!["a1", "b2", "c2"],
586 vec!["a1", "b c0"],
587 vec!["a1", "b c1"],
588 vec!["a1", "b c2"],
589 vec!["a2", "b0", "c0"],
590 vec!["a2", "b0", "c1"],
591 vec!["a2", "b0", "c2"],
592 vec!["a2", "b1", "c0"],
593 vec!["a2", "b1", "c1"],
594 vec!["a2", "b1", "c2"],
595 vec!["a2", "b2", "c0"],
596 vec!["a2", "b2", "c1"],
597 vec!["a2", "b2", "c2"],
598 vec!["a2", "b c0"],
599 vec!["a2", "b c1"],
600 vec!["a2", "b c2"],
601 vec!["a b0", "c0"],
602 vec!["a b0", "c1"],
603 vec!["a b0", "c2"],
604 vec!["a b1", "c0"],
605 vec!["a b1", "c1"],
606 vec!["a b1", "c2"],
607 vec!["a b2", "c0"],
608 vec!["a b2", "c1"],
609 vec!["a b2", "c2"]
610 ]
611 );
612 Ok(())
613 }
614
615 #[test]
616 fn filter_map_chunks_1_prune_a() -> anyhow::Result<()> {
617 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
618 "a" => Ok(None),
619 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
620 })?;
621 check_paths(paths, vec![]);
622 Ok(())
623 }
624
625 #[test]
626 fn filter_map_chunks_1_prune_b() -> anyhow::Result<()> {
627 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
628 "b" => Ok(None),
629 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
630 })?;
631 check_paths(paths, vec![]);
632 Ok(())
633 }
634
635 #[test]
636 fn filter_map_chunks_1_prune_c() -> anyhow::Result<()> {
637 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
638 "c" => Ok(None),
639 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
640 })?;
641 check_paths(paths, vec![]);
642 Ok(())
643 }
644
645 #[test]
646 fn filter_map_chunks_1_prune_d() -> anyhow::Result<()> {
647 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
648 "d" => Ok(None),
649 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
650 })?;
651 check_paths(paths, vec![]);
652 Ok(())
653 }
654
655 #[test]
656 fn filter_map_chunks_1_prune_e() -> anyhow::Result<()> {
657 let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
658 "e" => Ok(None),
659 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
660 })?;
661 check_paths(paths, vec![]);
662 Ok(())
663 }
664
665 #[test]
666 fn filter_map_chunks_2_prune_a() -> anyhow::Result<()> {
667 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
668 "a" => Ok(None),
669 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
670 })?;
671 check_paths(
672 paths,
673 vec![
674 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
675 vec![("a b", 0), ("c", 2), ("d e", 3)],
676 vec![("a b", 0), ("c d", 2), ("e", 4)],
677 ],
678 );
679 Ok(())
680 }
681
682 #[test]
683 fn filter_map_chunks_2_prune_b() -> anyhow::Result<()> {
684 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
685 "b" => Ok(None),
686 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
687 })?;
688 check_paths(
689 paths,
690 vec![
691 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
692 vec![("a", 0), ("b c", 1), ("d e", 3)],
693 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
694 vec![("a b", 0), ("c", 2), ("d e", 3)],
695 vec![("a b", 0), ("c d", 2), ("e", 4)],
696 ],
697 );
698 Ok(())
699 }
700
701 #[test]
702 fn filter_map_chunks_2_prune_c() -> anyhow::Result<()> {
703 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
704 "c" => Ok(None),
705 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
706 })?;
707 check_paths(
708 paths,
709 vec![
710 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
711 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
712 vec![("a", 0), ("b c", 1), ("d e", 3)],
713 vec![("a b", 0), ("c d", 2), ("e", 4)],
714 ],
715 );
716 Ok(())
717 }
718
719 #[test]
720 fn filter_map_chunks_2_prune_d() -> anyhow::Result<()> {
721 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
722 "d" => Ok(None),
723 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
724 })?;
725 check_paths(
726 paths,
727 vec![
728 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
729 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
730 vec![("a", 0), ("b c", 1), ("d e", 3)],
731 vec![("a b", 0), ("c", 2), ("d e", 3)],
732 vec![("a b", 0), ("c d", 2), ("e", 4)],
733 ],
734 );
735 Ok(())
736 }
737
738 #[test]
739 fn filter_map_chunks_2_prune_e() -> anyhow::Result<()> {
740 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
741 "e" => Ok(None),
742 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
743 })?;
744 check_paths(
745 paths,
746 vec![
747 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
748 vec![("a", 0), ("b c", 1), ("d e", 3)],
749 vec![("a b", 0), ("c", 2), ("d e", 3)],
750 ],
751 );
752 Ok(())
753 }
754
755 #[test]
756 fn filter_map_chunks_2_prune_ab() -> anyhow::Result<()> {
757 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
758 "a b" => Ok(None),
759 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
760 })?;
761 check_paths(
762 paths,
763 vec![
764 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
765 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
766 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
767 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
768 vec![("a", 0), ("b c", 1), ("d e", 3)],
769 ],
770 );
771 Ok(())
772 }
773
774 #[test]
775 fn filter_map_chunks_2_prune_bc() -> anyhow::Result<()> {
776 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
777 "b c" => Ok(None),
778 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
779 })?;
780 check_paths(
781 paths,
782 vec![
783 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
784 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
785 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
786 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
787 vec![("a b", 0), ("c", 2), ("d e", 3)],
788 vec![("a b", 0), ("c d", 2), ("e", 4)],
789 ],
790 );
791 Ok(())
792 }
793
794 #[test]
795 fn filter_map_chunks_2_prune_cd() -> anyhow::Result<()> {
796 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
797 "c d" => Ok(None),
798 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
799 })?;
800 check_paths(
801 paths,
802 vec![
803 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
804 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
805 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
806 vec![("a", 0), ("b c", 1), ("d e", 3)],
807 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
808 vec![("a b", 0), ("c", 2), ("d e", 3)],
809 ],
810 );
811 Ok(())
812 }
813
814 #[test]
815 fn filter_map_chunks_2_prune_de() -> anyhow::Result<()> {
816 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
817 "d e" => Ok(None),
818 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
819 })?;
820 check_paths(
821 paths,
822 vec![
823 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
824 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
825 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
826 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
827 vec![("a b", 0), ("c d", 2), ("e", 4)],
828 ],
829 );
830 Ok(())
831 }
832
833 #[test]
834 fn filter_map_chunks_2_prune_a_bc() -> anyhow::Result<()> {
835 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
836 "a" | "b c" => Ok(None),
837 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
838 })?;
839 check_paths(
840 paths,
841 vec![
842 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
843 vec![("a b", 0), ("c", 2), ("d e", 3)],
844 vec![("a b", 0), ("c d", 2), ("e", 4)],
845 ],
846 );
847 Ok(())
848 }
849
850 #[test]
851 fn filter_map_chunks_2_prune_a_cd() -> anyhow::Result<()> {
852 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
853 "a" | "c d" => Ok(None),
854 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
855 })?;
856 check_paths(
857 paths,
858 vec![
859 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
860 vec![("a b", 0), ("c", 2), ("d e", 3)],
861 ],
862 );
863 Ok(())
864 }
865
866 #[test]
867 fn filter_map_chunks_2_prune_bc_cd() -> anyhow::Result<()> {
868 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
869 "b c" | "c d" => Ok(None),
870 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
871 })?;
872 check_paths(
873 paths,
874 vec![
875 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
876 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
877 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
878 vec![("a b", 0), ("c", 2), ("d e", 3)],
879 ],
880 );
881 Ok(())
882 }
883
884 #[test]
885 fn filter_map_chunks_2_prune_bc_de() -> anyhow::Result<()> {
886 let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
887 "b c" | "d e" => Ok(None),
888 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
889 })?;
890 check_paths(
891 paths,
892 vec![
893 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
894 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
895 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
896 vec![("a b", 0), ("c d", 2), ("e", 4)],
897 ],
898 );
899 Ok(())
900 }
901
902 #[test]
903 fn filter_map_chunks_3_prune_a() -> anyhow::Result<()> {
904 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
905 "a" => Ok(None),
906 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
907 })?;
908 check_paths(
909 paths,
910 vec![
911 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
912 vec![("a b", 0), ("c", 2), ("d e", 3)],
913 vec![("a b", 0), ("c d", 2), ("e", 4)],
914 vec![("a b", 0), ("c d e", 2)],
915 vec![("a b c", 0), ("d", 3), ("e", 4)],
916 vec![("a b c", 0), ("d e", 3)],
917 ],
918 );
919 Ok(())
920 }
921
922 #[test]
923 fn filter_map_chunks_3_prune_b() -> anyhow::Result<()> {
924 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
925 "b" => Ok(None),
926 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
927 })?;
928 check_paths(
929 paths,
930 vec![
931 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
932 vec![("a", 0), ("b c", 1), ("d e", 3)],
933 vec![("a", 0), ("b c d", 1), ("e", 4)],
934 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
935 vec![("a b", 0), ("c", 2), ("d e", 3)],
936 vec![("a b", 0), ("c d", 2), ("e", 4)],
937 vec![("a b", 0), ("c d e", 2)],
938 vec![("a b c", 0), ("d", 3), ("e", 4)],
939 vec![("a b c", 0), ("d e", 3)],
940 ],
941 );
942 Ok(())
943 }
944
945 #[test]
946 fn filter_map_chunks_3_prune_c() -> anyhow::Result<()> {
947 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
948 "c" => Ok(None),
949 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
950 })?;
951 check_paths(
952 paths,
953 vec![
954 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
955 vec![("a", 0), ("b", 1), ("c d e", 2)],
956 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
957 vec![("a", 0), ("b c", 1), ("d e", 3)],
958 vec![("a", 0), ("b c d", 1), ("e", 4)],
959 vec![("a b", 0), ("c d", 2), ("e", 4)],
960 vec![("a b", 0), ("c d e", 2)],
961 vec![("a b c", 0), ("d", 3), ("e", 4)],
962 vec![("a b c", 0), ("d e", 3)],
963 ],
964 );
965 Ok(())
966 }
967
968 #[test]
969 fn filter_map_chunks_3_prune_d() -> anyhow::Result<()> {
970 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
971 "d" => Ok(None),
972 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
973 })?;
974 check_paths(
975 paths,
976 vec![
977 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
978 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
979 vec![("a", 0), ("b", 1), ("c d e", 2)],
980 vec![("a", 0), ("b c", 1), ("d e", 3)],
981 vec![("a", 0), ("b c d", 1), ("e", 4)],
982 vec![("a b", 0), ("c", 2), ("d e", 3)],
983 vec![("a b", 0), ("c d", 2), ("e", 4)],
984 vec![("a b", 0), ("c d e", 2)],
985 vec![("a b c", 0), ("d e", 3)],
986 ],
987 );
988 Ok(())
989 }
990
991 #[test]
992 fn filter_map_chunks_3_prune_e() -> anyhow::Result<()> {
993 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
994 "e" => Ok(None),
995 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
996 })?;
997 check_paths(
998 paths,
999 vec![
1000 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1001 vec![("a", 0), ("b", 1), ("c d e", 2)],
1002 vec![("a", 0), ("b c", 1), ("d e", 3)],
1003 vec![("a b", 0), ("c", 2), ("d e", 3)],
1004 vec![("a b", 0), ("c d e", 2)],
1005 vec![("a b c", 0), ("d e", 3)],
1006 ],
1007 );
1008 Ok(())
1009 }
1010
1011 #[test]
1012 fn filter_map_chunks_3_prune_ab() -> anyhow::Result<()> {
1013 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1014 "a b" => Ok(None),
1015 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1016 })?;
1017 check_paths(
1018 paths,
1019 vec![
1020 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1021 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1022 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1023 vec![("a", 0), ("b", 1), ("c d e", 2)],
1024 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1025 vec![("a", 0), ("b c", 1), ("d e", 3)],
1026 vec![("a", 0), ("b c d", 1), ("e", 4)],
1027 vec![("a b c", 0), ("d", 3), ("e", 4)],
1028 vec![("a b c", 0), ("d e", 3)],
1029 ],
1030 );
1031 Ok(())
1032 }
1033
1034 #[test]
1035 fn filter_map_chunks_3_prune_bc() -> anyhow::Result<()> {
1036 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1037 "b c" => Ok(None),
1038 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1039 })?;
1040 check_paths(
1041 paths,
1042 vec![
1043 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1044 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1045 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1046 vec![("a", 0), ("b", 1), ("c d e", 2)],
1047 vec![("a", 0), ("b c d", 1), ("e", 4)],
1048 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1049 vec![("a b", 0), ("c", 2), ("d e", 3)],
1050 vec![("a b", 0), ("c d", 2), ("e", 4)],
1051 vec![("a b", 0), ("c d e", 2)],
1052 vec![("a b c", 0), ("d", 3), ("e", 4)],
1053 vec![("a b c", 0), ("d e", 3)],
1054 ],
1055 );
1056 Ok(())
1057 }
1058
1059 #[test]
1060 fn filter_map_chunks_3_prune_cd() -> anyhow::Result<()> {
1061 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1062 "c d" => Ok(None),
1063 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1064 })?;
1065 check_paths(
1066 paths,
1067 vec![
1068 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1069 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1070 vec![("a", 0), ("b", 1), ("c d e", 2)],
1071 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1072 vec![("a", 0), ("b c", 1), ("d e", 3)],
1073 vec![("a", 0), ("b c d", 1), ("e", 4)],
1074 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1075 vec![("a b", 0), ("c", 2), ("d e", 3)],
1076 vec![("a b", 0), ("c d e", 2)],
1077 vec![("a b c", 0), ("d", 3), ("e", 4)],
1078 vec![("a b c", 0), ("d e", 3)],
1079 ],
1080 );
1081 Ok(())
1082 }
1083
1084 #[test]
1085 fn filter_map_chunks_3_prune_de() -> anyhow::Result<()> {
1086 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1087 "d e" => Ok(None),
1088 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1089 })?;
1090 check_paths(
1091 paths,
1092 vec![
1093 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1094 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1095 vec![("a", 0), ("b", 1), ("c d e", 2)],
1096 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1097 vec![("a", 0), ("b c d", 1), ("e", 4)],
1098 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1099 vec![("a b", 0), ("c d", 2), ("e", 4)],
1100 vec![("a b", 0), ("c d e", 2)],
1101 vec![("a b c", 0), ("d", 3), ("e", 4)],
1102 ],
1103 );
1104 Ok(())
1105 }
1106
1107 #[test]
1108 fn filter_map_chunks_3_prune_abc() -> anyhow::Result<()> {
1109 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1110 "a b c" => Ok(None),
1111 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1112 })?;
1113 check_paths(
1114 paths,
1115 vec![
1116 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1117 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1118 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1119 vec![("a", 0), ("b", 1), ("c d e", 2)],
1120 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1121 vec![("a", 0), ("b c", 1), ("d e", 3)],
1122 vec![("a", 0), ("b c d", 1), ("e", 4)],
1123 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1124 vec![("a b", 0), ("c", 2), ("d e", 3)],
1125 vec![("a b", 0), ("c d", 2), ("e", 4)],
1126 vec![("a b", 0), ("c d e", 2)],
1127 ],
1128 );
1129 Ok(())
1130 }
1131
1132 #[test]
1133 fn filter_map_chunks_3_prune_bcd() -> anyhow::Result<()> {
1134 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1135 "b c d" => Ok(None),
1136 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1137 })?;
1138 check_paths(
1139 paths,
1140 vec![
1141 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1142 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1143 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1144 vec![("a", 0), ("b", 1), ("c d e", 2)],
1145 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1146 vec![("a", 0), ("b c", 1), ("d e", 3)],
1147 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1148 vec![("a b", 0), ("c", 2), ("d e", 3)],
1149 vec![("a b", 0), ("c d", 2), ("e", 4)],
1150 vec![("a b", 0), ("c d e", 2)],
1151 vec![("a b c", 0), ("d", 3), ("e", 4)],
1152 vec![("a b c", 0), ("d e", 3)],
1153 ],
1154 );
1155 Ok(())
1156 }
1157
1158 #[test]
1159 fn filter_map_chunks_3_prune_cde() -> anyhow::Result<()> {
1160 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1161 "c d e" => Ok(None),
1162 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1163 })?;
1164 check_paths(
1165 paths,
1166 vec![
1167 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1168 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1169 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1170 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1171 vec![("a", 0), ("b c", 1), ("d e", 3)],
1172 vec![("a", 0), ("b c d", 1), ("e", 4)],
1173 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1174 vec![("a b", 0), ("c", 2), ("d e", 3)],
1175 vec![("a b", 0), ("c d", 2), ("e", 4)],
1176 vec![("a b c", 0), ("d", 3), ("e", 4)],
1177 vec![("a b c", 0), ("d e", 3)],
1178 ],
1179 );
1180 Ok(())
1181 }
1182
1183 #[test]
1184 fn filter_map_chunks_3_prune_a_bc_cde() -> anyhow::Result<()> {
1185 let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1186 "a" | "b c" | "c d e" => Ok(None),
1187 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1188 })?;
1189 check_paths(
1190 paths,
1191 vec![
1192 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1193 vec![("a b", 0), ("c", 2), ("d e", 3)],
1194 vec![("a b", 0), ("c d", 2), ("e", 4)],
1195 vec![("a b c", 0), ("d", 3), ("e", 4)],
1196 vec![("a b c", 0), ("d e", 3)],
1197 ],
1198 );
1199 Ok(())
1200 }
1201
1202 #[test]
1203 fn filter_map_chunks_4_prune_a() -> anyhow::Result<()> {
1204 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1205 "a" => Ok(None),
1206 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1207 })?;
1208 check_paths(
1209 paths,
1210 vec![
1211 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1212 vec![("a b", 0), ("c", 2), ("d e", 3)],
1213 vec![("a b", 0), ("c d", 2), ("e", 4)],
1214 vec![("a b", 0), ("c d e", 2)],
1215 vec![("a b c", 0), ("d", 3), ("e", 4)],
1216 vec![("a b c", 0), ("d e", 3)],
1217 vec![("a b c d", 0), ("e", 4)],
1218 ],
1219 );
1220 Ok(())
1221 }
1222
1223 #[test]
1224 fn filter_map_chunks_4_prune_b() -> anyhow::Result<()> {
1225 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1226 "b" => Ok(None),
1227 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1228 })?;
1229 check_paths(
1230 paths,
1231 vec![
1232 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1233 vec![("a", 0), ("b c", 1), ("d e", 3)],
1234 vec![("a", 0), ("b c d", 1), ("e", 4)],
1235 vec![("a", 0), ("b c d e", 1)],
1236 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1237 vec![("a b", 0), ("c", 2), ("d e", 3)],
1238 vec![("a b", 0), ("c d", 2), ("e", 4)],
1239 vec![("a b", 0), ("c d e", 2)],
1240 vec![("a b c", 0), ("d", 3), ("e", 4)],
1241 vec![("a b c", 0), ("d e", 3)],
1242 vec![("a b c d", 0), ("e", 4)],
1243 ],
1244 );
1245 Ok(())
1246 }
1247
1248 #[test]
1249 fn filter_map_chunks_4_prune_c() -> anyhow::Result<()> {
1250 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1251 "c" => Ok(None),
1252 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1253 })?;
1254 check_paths(
1255 paths,
1256 vec![
1257 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1258 vec![("a", 0), ("b", 1), ("c d e", 2)],
1259 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1260 vec![("a", 0), ("b c", 1), ("d e", 3)],
1261 vec![("a", 0), ("b c d", 1), ("e", 4)],
1262 vec![("a", 0), ("b c d e", 1)],
1263 vec![("a b", 0), ("c d", 2), ("e", 4)],
1264 vec![("a b", 0), ("c d e", 2)],
1265 vec![("a b c", 0), ("d", 3), ("e", 4)],
1266 vec![("a b c", 0), ("d e", 3)],
1267 vec![("a b c d", 0), ("e", 4)],
1268 ],
1269 );
1270 Ok(())
1271 }
1272
1273 #[test]
1274 fn filter_map_chunks_4_prune_d() -> anyhow::Result<()> {
1275 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1276 "d" => Ok(None),
1277 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1278 })?;
1279 check_paths(
1280 paths,
1281 vec![
1282 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1283 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1284 vec![("a", 0), ("b", 1), ("c d e", 2)],
1285 vec![("a", 0), ("b c", 1), ("d e", 3)],
1286 vec![("a", 0), ("b c d", 1), ("e", 4)],
1287 vec![("a", 0), ("b c d e", 1)],
1288 vec![("a b", 0), ("c", 2), ("d e", 3)],
1289 vec![("a b", 0), ("c d", 2), ("e", 4)],
1290 vec![("a b", 0), ("c d e", 2)],
1291 vec![("a b c", 0), ("d e", 3)],
1292 vec![("a b c d", 0), ("e", 4)],
1293 ],
1294 );
1295 Ok(())
1296 }
1297
1298 #[test]
1299 fn filter_map_chunks_4_prune_e() -> anyhow::Result<()> {
1300 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1301 "e" => Ok(None),
1302 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1303 })?;
1304 check_paths(
1305 paths,
1306 vec![
1307 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1308 vec![("a", 0), ("b", 1), ("c d e", 2)],
1309 vec![("a", 0), ("b c", 1), ("d e", 3)],
1310 vec![("a", 0), ("b c d e", 1)],
1311 vec![("a b", 0), ("c", 2), ("d e", 3)],
1312 vec![("a b", 0), ("c d e", 2)],
1313 vec![("a b c", 0), ("d e", 3)],
1314 ],
1315 );
1316 Ok(())
1317 }
1318
1319 #[test]
1320 fn filter_map_chunks_4_prune_ab() -> anyhow::Result<()> {
1321 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1322 "a b" => Ok(None),
1323 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1324 })?;
1325 check_paths(
1326 paths,
1327 vec![
1328 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1329 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1330 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1331 vec![("a", 0), ("b", 1), ("c d e", 2)],
1332 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1333 vec![("a", 0), ("b c", 1), ("d e", 3)],
1334 vec![("a", 0), ("b c d", 1), ("e", 4)],
1335 vec![("a", 0), ("b c d e", 1)],
1336 vec![("a b c", 0), ("d", 3), ("e", 4)],
1337 vec![("a b c", 0), ("d e", 3)],
1338 vec![("a b c d", 0), ("e", 4)],
1339 ],
1340 );
1341 Ok(())
1342 }
1343
1344 #[test]
1345 fn filter_map_chunks_4_prune_bc() -> anyhow::Result<()> {
1346 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1347 "b c" => Ok(None),
1348 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1349 })?;
1350 check_paths(
1351 paths,
1352 vec![
1353 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1354 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1355 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1356 vec![("a", 0), ("b", 1), ("c d e", 2)],
1357 vec![("a", 0), ("b c d", 1), ("e", 4)],
1358 vec![("a", 0), ("b c d e", 1)],
1359 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1360 vec![("a b", 0), ("c", 2), ("d e", 3)],
1361 vec![("a b", 0), ("c d", 2), ("e", 4)],
1362 vec![("a b", 0), ("c d e", 2)],
1363 vec![("a b c", 0), ("d", 3), ("e", 4)],
1364 vec![("a b c", 0), ("d e", 3)],
1365 vec![("a b c d", 0), ("e", 4)],
1366 ],
1367 );
1368 Ok(())
1369 }
1370
1371 #[test]
1372 fn filter_map_chunks_4_prune_cd() -> anyhow::Result<()> {
1373 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1374 "c d" => Ok(None),
1375 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1376 })?;
1377 check_paths(
1378 paths,
1379 vec![
1380 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1381 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1382 vec![("a", 0), ("b", 1), ("c d e", 2)],
1383 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1384 vec![("a", 0), ("b c", 1), ("d e", 3)],
1385 vec![("a", 0), ("b c d", 1), ("e", 4)],
1386 vec![("a", 0), ("b c d e", 1)],
1387 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1388 vec![("a b", 0), ("c", 2), ("d e", 3)],
1389 vec![("a b", 0), ("c d e", 2)],
1390 vec![("a b c", 0), ("d", 3), ("e", 4)],
1391 vec![("a b c", 0), ("d e", 3)],
1392 vec![("a b c d", 0), ("e", 4)],
1393 ],
1394 );
1395 Ok(())
1396 }
1397
1398 #[test]
1399 fn filter_map_chunks_4_prune_de() -> anyhow::Result<()> {
1400 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1401 "d e" => Ok(None),
1402 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1403 })?;
1404 check_paths(
1405 paths,
1406 vec![
1407 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1408 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1409 vec![("a", 0), ("b", 1), ("c d e", 2)],
1410 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1411 vec![("a", 0), ("b c d", 1), ("e", 4)],
1412 vec![("a", 0), ("b c d e", 1)],
1413 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1414 vec![("a b", 0), ("c d", 2), ("e", 4)],
1415 vec![("a b", 0), ("c d e", 2)],
1416 vec![("a b c", 0), ("d", 3), ("e", 4)],
1417 vec![("a b c d", 0), ("e", 4)],
1418 ],
1419 );
1420 Ok(())
1421 }
1422
1423 #[test]
1424 fn filter_map_chunks_4_prune_abc() -> anyhow::Result<()> {
1425 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1426 "a b c" => Ok(None),
1427 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1428 })?;
1429 check_paths(
1430 paths,
1431 vec![
1432 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1433 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1434 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1435 vec![("a", 0), ("b", 1), ("c d e", 2)],
1436 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1437 vec![("a", 0), ("b c", 1), ("d e", 3)],
1438 vec![("a", 0), ("b c d", 1), ("e", 4)],
1439 vec![("a", 0), ("b c d e", 1)],
1440 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1441 vec![("a b", 0), ("c", 2), ("d e", 3)],
1442 vec![("a b", 0), ("c d", 2), ("e", 4)],
1443 vec![("a b", 0), ("c d e", 2)],
1444 vec![("a b c d", 0), ("e", 4)],
1445 ],
1446 );
1447 Ok(())
1448 }
1449
1450 #[test]
1451 fn filter_map_chunks_4_prune_bcd() -> anyhow::Result<()> {
1452 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1453 "b c d" => Ok(None),
1454 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1455 })?;
1456 check_paths(
1457 paths,
1458 vec![
1459 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1460 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1461 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1462 vec![("a", 0), ("b", 1), ("c d e", 2)],
1463 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1464 vec![("a", 0), ("b c", 1), ("d e", 3)],
1465 vec![("a", 0), ("b c d e", 1)],
1466 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1467 vec![("a b", 0), ("c", 2), ("d e", 3)],
1468 vec![("a b", 0), ("c d", 2), ("e", 4)],
1469 vec![("a b", 0), ("c d e", 2)],
1470 vec![("a b c", 0), ("d", 3), ("e", 4)],
1471 vec![("a b c", 0), ("d e", 3)],
1472 vec![("a b c d", 0), ("e", 4)],
1473 ],
1474 );
1475 Ok(())
1476 }
1477
1478 #[test]
1479 fn filter_map_chunks_4_prune_cde() -> anyhow::Result<()> {
1480 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1481 "c d e" => Ok(None),
1482 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1483 })?;
1484 check_paths(
1485 paths,
1486 vec![
1487 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1488 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1489 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1490 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1491 vec![("a", 0), ("b c", 1), ("d e", 3)],
1492 vec![("a", 0), ("b c d", 1), ("e", 4)],
1493 vec![("a", 0), ("b c d e", 1)],
1494 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1495 vec![("a b", 0), ("c", 2), ("d e", 3)],
1496 vec![("a b", 0), ("c d", 2), ("e", 4)],
1497 vec![("a b c", 0), ("d", 3), ("e", 4)],
1498 vec![("a b c", 0), ("d e", 3)],
1499 vec![("a b c d", 0), ("e", 4)],
1500 ],
1501 );
1502 Ok(())
1503 }
1504
1505 #[test]
1506 fn filter_map_chunks_4_prune_abcd() -> anyhow::Result<()> {
1507 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1508 "a b c d" => Ok(None),
1509 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1510 })?;
1511 check_paths(
1512 paths,
1513 vec![
1514 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1515 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1516 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1517 vec![("a", 0), ("b", 1), ("c d e", 2)],
1518 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1519 vec![("a", 0), ("b c", 1), ("d e", 3)],
1520 vec![("a", 0), ("b c d", 1), ("e", 4)],
1521 vec![("a", 0), ("b c d e", 1)],
1522 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1523 vec![("a b", 0), ("c", 2), ("d e", 3)],
1524 vec![("a b", 0), ("c d", 2), ("e", 4)],
1525 vec![("a b", 0), ("c d e", 2)],
1526 vec![("a b c", 0), ("d", 3), ("e", 4)],
1527 vec![("a b c", 0), ("d e", 3)],
1528 ],
1529 );
1530 Ok(())
1531 }
1532
1533 #[test]
1534 fn filter_map_chunks_4_prune_bcde() -> anyhow::Result<()> {
1535 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1536 "b c d e" => Ok(None),
1537 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1538 })?;
1539 check_paths(
1540 paths,
1541 vec![
1542 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1543 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1544 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1545 vec![("a", 0), ("b", 1), ("c d e", 2)],
1546 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1547 vec![("a", 0), ("b c", 1), ("d e", 3)],
1548 vec![("a", 0), ("b c d", 1), ("e", 4)],
1549 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1550 vec![("a b", 0), ("c", 2), ("d e", 3)],
1551 vec![("a b", 0), ("c d", 2), ("e", 4)],
1552 vec![("a b", 0), ("c d e", 2)],
1553 vec![("a b c", 0), ("d", 3), ("e", 4)],
1554 vec![("a b c", 0), ("d e", 3)],
1555 vec![("a b c d", 0), ("e", 4)],
1556 ],
1557 );
1558 Ok(())
1559 }
1560
1561 #[test]
1562 fn filter_map_chunks_4_prune_a_bc_de() -> anyhow::Result<()> {
1563 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1564 "a" | "b c" | "d e" => Ok(None),
1565 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1566 })?;
1567 check_paths(
1568 paths,
1569 vec![
1570 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1571 vec![("a b", 0), ("c d", 2), ("e", 4)],
1572 vec![("a b", 0), ("c d e", 2)],
1573 vec![("a b c", 0), ("d", 3), ("e", 4)],
1574 vec![("a b c d", 0), ("e", 4)],
1575 ],
1576 );
1577 Ok(())
1578 }
1579
1580 #[test]
1581 fn filter_map_chunks_4_prune_a_bc_cde() -> anyhow::Result<()> {
1582 let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1583 "a" | "b c" | "c d e" => Ok(None),
1584 _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1585 })?;
1586 check_paths(
1587 paths,
1588 vec![
1589 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1590 vec![("a b", 0), ("c", 2), ("d e", 3)],
1591 vec![("a b", 0), ("c d", 2), ("e", 4)],
1592 vec![("a b c", 0), ("d", 3), ("e", 4)],
1593 vec![("a b c", 0), ("d e", 3)],
1594 vec![("a b c d", 0), ("e", 4)],
1595 ],
1596 );
1597 Ok(())
1598 }
1599
1600 #[test]
1601 fn filter_map_chunks_spaces() -> anyhow::Result<()> {
1602 let paths = fmc(" a b c d e ", 2, |chunk, chunk_index, _, _| {
1603 Ok(Some(vec![(chunk.to_string(), chunk_index)]))
1604 })?;
1605 check_paths(
1606 paths,
1607 vec![
1608 vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1609 vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1610 vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1611 vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1612 vec![("a", 0), ("b c", 1), ("d e", 3)],
1613 vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1614 vec![("a b", 0), ("c", 2), ("d e", 3)],
1615 vec![("a b", 0), ("c d", 2), ("e", 4)],
1616 ],
1617 );
1618 Ok(())
1619 }
1620
1621 #[test]
1622 fn filter_map_chunks_is_last_1() -> anyhow::Result<()> {
1623 let paths = fmc("a b c d e", 1, |chunk, _, is_last, _| {
1624 Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1625 })?;
1626 check_paths(
1627 paths,
1628 vec![vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)]],
1629 );
1630 Ok(())
1631 }
1632
1633 #[test]
1634 fn filter_map_chunks_is_last_2() -> anyhow::Result<()> {
1635 let paths = fmc("a b c d e", 2, |chunk, _, is_last, _| {
1636 Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1637 })?;
1638 check_paths(
1639 paths,
1640 vec![
1641 vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)],
1642 vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)],
1643 vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)],
1644 vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)],
1645 vec![("a", 0), ("b c", 0), ("d e", 1)],
1646 vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)],
1647 vec![("a b", 0), ("c", 0), ("d e", 1)],
1648 vec![("a b", 0), ("c d", 0), ("e", 1)],
1649 ],
1650 );
1651 Ok(())
1652 }
1653
1654 #[test]
1655 fn filter_map_chunks_is_last_3() -> anyhow::Result<()> {
1656 let paths = fmc("a b c d e", 3, |chunk, _, is_last, _| {
1657 Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1658 })?;
1659 check_paths(
1660 paths,
1661 vec![
1662 vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)],
1663 vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)],
1664 vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)],
1665 vec![("a", 0), ("b", 0), ("c d e", 1)],
1666 vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)],
1667 vec![("a", 0), ("b c", 0), ("d e", 1)],
1668 vec![("a", 0), ("b c d", 0), ("e", 1)],
1669 vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)],
1670 vec![("a b", 0), ("c", 0), ("d e", 1)],
1671 vec![("a b", 0), ("c d", 0), ("e", 1)],
1672 vec![("a b", 0), ("c d e", 1)],
1673 vec![("a b c", 0), ("d", 0), ("e", 1)],
1674 vec![("a b c", 0), ("d e", 1)],
1675 ],
1676 );
1677 Ok(())
1678 }
1679
1680 #[test]
1681 fn test_split_keyword() {
1682 assert_eq!(split_keyword("foo"), ("foo", ""));
1683 assert_eq!(split_keyword("foo bar baz"), ("foo", "bar baz"));
1684 }
1685
1686 #[test]
1687 fn i18n_transform() -> anyhow::Result<()> {
1688 let tests = [
1690 ("AbC", "AbC"),
1691 ("AbC dEf", "AbC dEf"),
1692 ("Àęí", "Aei"),
1693 ("Qu\u{00e9}bec", "Quebec"),
1695 ("Que\u{0301}bec", "Quebec"),
1697 ("Gößnitz", "Goßnitz"),
1698 ("St. Louis", "St Louis"),
1699 ("Washington, D.C.", "Washington DC"),
1700 ("U.S.A.", "USA"),
1701 ("Carmel-by-the-Sea", "Carmel by the Sea"),
1702 ("Val-d'Or", "Val dOr"),
1703 ("Val-d’Or", "Val dOr"),
1704 (".,-'()[]?<>", " ()[]?<>"),
1705 ];
1706 for (test_str, expected_str) in tests {
1707 assert_eq!(
1708 super::i18n_transform(test_str),
1709 expected_str,
1710 "Transform test str: {:?}",
1711 test_str
1712 );
1713 }
1714 Ok(())
1715 }
1716
1717 #[test]
1718 fn i18n_cmp() -> anyhow::Result<()> {
1719 let tests = [
1720 ["AbC xYz", "ABC XYZ", "abc xyz"].as_slice(),
1721 &["Àęí", "Aei", "àęí", "aei"],
1722 &[
1723 "Qu\u{00e9}bec",
1725 "Que\u{0301}bec",
1727 "Quebec",
1728 "quebec",
1729 ],
1730 &[
1731 "Gößnitz",
1732 "Gössnitz",
1733 "Goßnitz",
1734 "Gossnitz",
1735 "gößnitz",
1736 "gössnitz",
1737 "goßnitz",
1738 "gossnitz",
1739 ],
1740 &["St. Louis", "St... Louis", "St Louis", "st louis"],
1741 &[
1742 "Washington, D.C.",
1743 "Washington, DC",
1744 "Washington D.C.",
1745 "Washington DC",
1746 "washington dc",
1747 ],
1748 &[
1749 "U.S.A.", "US.A.", "U.SA.", "U.S.A", "USA.", "U.SA", "USA", "usa",
1750 ],
1751 &[
1752 "Val-d'Or",
1753 "Val-d’Or",
1754 "Val-dOr",
1755 "Val d'Or",
1756 "Val d’Or",
1757 "Val dOr",
1758 "val dor",
1759 ],
1760 &[
1761 "Carmel-by-the-Sea",
1762 "Carmel by the Sea",
1763 "carmel by the sea",
1764 ],
1765 &[".,-'()[]?<>", " ()[]?<>"],
1766 ];
1767 for strs in tests {
1768 for a_and_b in strs.iter().permutations(2) {
1769 assert_eq!(
1770 super::i18n_cmp(a_and_b[0], a_and_b[1]),
1771 std::cmp::Ordering::Equal,
1772 "Comparing: {:?}",
1773 a_and_b
1774 );
1775 }
1776 }
1777 Ok(())
1778 }
1779}