suggest/
util.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 */
5
6use icu_casemap::CaseMapperBorrowed;
7use icu_normalizer::DecomposingNormalizerBorrowed;
8use icu_properties::props::GeneralCategory;
9use icu_properties::props::GeneralCategoryGroup;
10use icu_properties::CodePointMapDataBorrowed;
11use std::borrow::Cow;
12
13use crate::Result;
14
15static NFKD: DecomposingNormalizerBorrowed = DecomposingNormalizerBorrowed::new_nfkd();
16static GENERAL_CATEGORY: CodePointMapDataBorrowed<'static, GeneralCategory> =
17    icu_properties::CodePointMapData::<GeneralCategory>::new();
18static CASE_MAPPER: CaseMapperBorrowed = CaseMapperBorrowed::new();
19
20fn is_combining_mark(c: char) -> bool {
21    GeneralCategoryGroup::Mark.contains(GENERAL_CATEGORY.get(c))
22}
23
24/// Given a list of keywords for a suggestion, returns a phrase that best
25/// completes the user's query. This function uses two heuristics to pick the
26/// best match:
27///
28/// 1. Find the first keyword in the list that has at least one more word than
29///    the query, then trim the keyword up to the end of that word.
30/// 2. If there isn't a keyword with more words, pick the keyword that forms the
31///    longest suffix of the query. This might be the query itself.
32pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
33    let query_words_len = query.split_whitespace().count();
34    let min_phrase_words_len = if query.ends_with(char::is_whitespace) {
35        // If the query ends with a space, find a keyword with at least one more
36        // word, so that the completed phrase can show a word after the space.
37        query_words_len + 1
38    } else {
39        query_words_len
40    };
41    keywords
42        .iter()
43        .map(AsRef::as_ref)
44        .filter(|phrase| phrase.starts_with(query))
45        .map(|phrase| phrase.split_whitespace().collect::<Vec<_>>())
46        .find(|phrase_words| phrase_words.len() > min_phrase_words_len)
47        .map(|phrase_words| phrase_words[..min_phrase_words_len].join(" "))
48        .unwrap_or_else(|| {
49            keywords
50                .iter()
51                .map(AsRef::as_ref)
52                .filter(|phrase| phrase.starts_with(query) && query.len() < phrase.len())
53                .max_by_key(|phrase| phrase.trim().len())
54                .unwrap_or(query)
55                .to_owned()
56        })
57}
58
59/// Performs a depth-first traversal over all possible chunk sequences in a
60/// slice, applies a filter-map function to each chunk in each sequence, and
61/// collects the filter-mapped sequences in a `Vec`.
62///
63/// "Chunks" are non-overlapping subslices of the parent slice as described in
64/// [`slice::chunks()`].
65///
66/// WARNING: This function potentially does an exponential amount of work! You
67/// should always be careful to prune the traversal space by returning `None`
68/// from your mappper function, as described further below, when a chunk does
69/// not match what you are searching for.
70///
71/// `max_chunk_size` controls the maximum chunk size (in number of words), which
72/// influences the branching factor at each step in the traversal.
73///
74/// At each traversal step, the filter-map function is called like:
75/// `f(chunk, chunk_index, is_last_chunk, path)`.
76///
77/// `chunk` is the chunk at that step, `chunk_index` is its index in the parent
78/// `words` slice, and `is_last_chunk` is true if the chunk is at the end of
79/// `words`. The function can map the chunk to one or more values. Each value
80/// expands the branching factor at the current step by `max_chunk_size`. In
81/// other words, the branching factor at a given traversal step is
82/// `max_chunk_size` multiplied by the number of values returned by the
83/// filter-map function at that step. `path` is the path of mapped values that
84/// has been travsersed at that step: a sequence of mapped values corresponding
85/// to chunks in the parent `words` slice.
86///
87/// The filter-map function can return `None` to halt traversal at the current
88/// step. Returning `None` sets the branching factor at that step to zero,
89/// pruning the subtree rooted at that step from the traversal space and
90/// discarding the path from the output. This is important for keeping traversal
91/// reasonably bounded.
92///
93/// Traversal ends and the function returns when all paths have been visited.
94/// The returned `Vec` will contain all traversal paths that weren't pruned.
95///
96/// # Examples
97///
98/// Mapping chunks in `["a", "b", "c"]` to uppercase, up to a max chunk size of
99/// `3`:
100///
101/// ```
102/// # use suggest::util::filter_map_chunks;
103/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, _| {
104///     Ok(Some(vec![chunk.to_uppercase()]))
105/// });
106/// assert_eq!(paths.unwrap(), vec![
107///     vec!["A", "B", "C"],
108///     vec!["A", "B C"],
109///     vec!["A B", "C"],
110///     vec!["A B C"]
111/// ]);
112/// ```
113///
114/// Same as previous but using `chunk_index` in the filter-map function to prune
115/// paths that don't start with `"a"`:
116///
117/// ```
118/// # use suggest::util::filter_map_chunks;
119/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, chunk_index, _, _| {
120///     if chunk_index > 0 || chunk == "a" {
121///         Ok(Some(vec![chunk.to_uppercase()]))
122///     } else {
123///         Ok(None)
124///     }
125/// });
126/// assert_eq!(paths.unwrap(), vec![
127///     vec!["A", "B", "C"],
128///     vec!["A", "B C"],
129/// ]);
130/// ```
131///
132/// Same as the first example but using `path` in the filter-map function to
133/// prune paths that include "A B":
134///
135/// ```
136/// # use suggest::util::filter_map_chunks;
137/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, path| {
138///     if path.iter().any(|value| value == "A B") {
139///         Ok(None)
140///     } else {
141///         Ok(Some(vec![chunk.to_uppercase()]))
142///     }
143/// });
144/// assert_eq!(paths.unwrap(), vec![
145///     vec!["A", "B", "C"],
146///     vec!["A", "B C"],
147///     vec!["A B C"],
148/// ]);
149/// ```
150///
151/// Mapping each chunk to multiple values:
152///
153/// ```
154/// # use suggest::util::filter_map_chunks;
155/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, _| {
156///     Ok(Some(vec![format!("{chunk}0"), format!("{chunk}1")]))
157/// });
158/// assert_eq!(paths.unwrap(), vec![
159///     vec!["a0", "b0", "c0"],
160///     vec!["a0", "b0", "c1"],
161///     vec!["a0", "b1", "c0"],
162///     vec!["a0", "b1", "c1"],
163///     vec!["a0", "b c0"],
164///     vec!["a0", "b c1"],
165///     vec!["a1", "b0", "c0"],
166///     vec!["a1", "b0", "c1"],
167///     vec!["a1", "b1", "c0"],
168///     vec!["a1", "b1", "c1"],
169///     vec!["a1", "b c0"],
170///     vec!["a1", "b c1"],
171///     vec!["a b0", "c0"],
172///     vec!["a b0", "c1"],
173///     vec!["a b1", "c0"],
174///     vec!["a b1", "c1"],
175///     vec!["a b c0"],
176///     vec!["a b c1"]
177/// ]);
178/// ```
179pub fn filter_map_chunks<T: Clone>(
180    words: &[&str],
181    max_chunk_size: usize,
182    f: impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
183) -> Result<Vec<Vec<T>>> {
184    let normalized_query = words.join(" ");
185    filter_map_chunks_recurse(words, &normalized_query, &mut vec![], 0, max_chunk_size, &f)
186}
187
188/// `remaining_words` is the slice of remaining words in the query string at
189/// this step. `remaining_query` is the remaining slice of the normalized query
190/// string at this step.
191///
192/// `path` is the sequence of values returned by the filter-map function so far
193/// at this step.
194///
195/// `chunk_index` is the word-based index in the query string at this step.
196fn filter_map_chunks_recurse<T: Clone>(
197    remaining_words: &[&str],
198    remaining_query: &str,
199    path: &mut Vec<T>,
200    chunk_index: usize,
201    max_chunk_size: usize,
202    f: &impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
203) -> Result<Vec<Vec<T>>> {
204    // Filtered-in (non-pruned) paths that will be returned from this step of
205    // the traversal.
206    let mut this_step_paths: Vec<Vec<T>> = vec![];
207
208    for chunk_size in 1..=max_chunk_size {
209        if remaining_words.len() < chunk_size {
210            // `chunk_size` and the later chunk sizes in this for-loop are too
211            // big to visit the remaining words. We already visited them earlier
212            // in the loop when the chunk size was small enough.
213            break;
214        }
215
216        // Get the current chunk within the remaining query. Its byte length is
217        // the sum of the lengths of the words in the chunk + `chunk_size - 1`
218        // spaces between the words. There will only be one space between each
219        // word in `remaining_query` because `remaining_query` is normalized.
220        let chunk_byte_len = remaining_words[..chunk_size]
221            .iter()
222            .fold(chunk_size - 1, |memo, w| memo + w.len());
223        let chunk = &remaining_query[..chunk_byte_len];
224        let is_last_chunk = chunk_size == remaining_words.len();
225
226        // Call the mapper function.
227        if let Some(mapped_values) = f(chunk, chunk_index, is_last_chunk, &path[..])? {
228            for value in mapped_values {
229                if is_last_chunk {
230                    // This is the last chunk in the path. Stop recursing.
231                    this_step_paths.push(vec![value.clone()]);
232                } else {
233                    // Recurse. Note that the new `remaining_words` slice won't
234                    // be empty because if it were, `chunk_size` would equal the
235                    // remaining word count, which is if-branch condition.
236                    path.push(value.clone());
237                    let subtree_paths = filter_map_chunks_recurse(
238                        &remaining_words[chunk_size..],
239                        &remaining_query[(chunk_byte_len + 1)..],
240                        path,
241                        chunk_index + chunk_size,
242                        max_chunk_size,
243                        f,
244                    )?;
245                    path.pop();
246                    for mut p in subtree_paths {
247                        p.insert(0, value.clone());
248                        this_step_paths.push(p);
249                    }
250                }
251            }
252        }
253    }
254
255    Ok(this_step_paths)
256}
257
258/// Given a keyword for a suggestion, splits the keyword by the first whitespace
259/// into the prefix and the suffix. Returns an empty string as the suffix if there
260/// is no whitespace.
261pub fn split_keyword(keyword: &str) -> (&str, &str) {
262    keyword.split_once(' ').unwrap_or((keyword, ""))
263}
264
265/// Compares two strings ignoring case, Unicode combining marks, and some
266/// punctuation. Intended to be used as a Sqlite collating sequence for
267/// comparing natural language strings like keywords and geoname names.
268///
269/// XXX: Should really be using `icu_collator` for collation!
270pub fn i18n_cmp(a: &str, b: &str) -> std::cmp::Ordering {
271    CASE_MAPPER
272        .fold_string(&i18n_transform(a))
273        .cmp(&CASE_MAPPER.fold_string(&i18n_transform(b)))
274}
275
276/// Performs the following transforms on the given string:
277///
278/// * Removes Unicode combining marks
279/// * Removes some punctuation
280/// * Replaces other punctuation with spaces
281pub fn i18n_transform(s: &str) -> Cow<'_, str> {
282    // Punctuation to remove. Examples:
283    //
284    // "Washington, D.C." => "Washington DC"
285    // "L'Assomption" => "LAssomption"
286    macro_rules! pattern_remove {
287        () => {
288            '.' | ',' | '\'' | '’'
289        };
290    }
291
292    // Punctuation to replace with spaces. Examples:
293    //
294    // "Carmel-by-the-Sea" => "Carmel by the Sea"
295    macro_rules! pattern_replace_with_space {
296        () => {
297            '-'
298        };
299    }
300
301    macro_rules! pattern_all {
302        () => {
303            pattern_remove!() | pattern_replace_with_space!()
304        };
305    }
306
307    let borrowable = !NFKD
308        .normalize_iter(s.chars())
309        .any(|c| is_combining_mark(c) || matches!(c, pattern_all!()));
310
311    if borrowable {
312        // XXX: This borrows the non-NFKD form, which sure looks like a bug,
313        // since NFKD can introduce changes that aren't caught by the check
314        // inside `any()` above.
315        Cow::from(s)
316    } else {
317        NFKD.normalize_iter(s.chars())
318            .filter_map(|c| {
319                if is_combining_mark(c) {
320                    // Remove Unicode combining marks:
321                    // "Que\u{0301}bec" => "Quebec"
322                    None
323                } else {
324                    match c {
325                        pattern_remove!() => None,
326                        pattern_replace_with_space!() => Some(' '),
327                        _ => Some(c),
328                    }
329                }
330            })
331            .collect::<_>()
332    }
333}
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338    use itertools::Itertools;
339
340    #[test]
341    fn keywords_with_more_words() {
342        assert_eq!(
343            full_keyword(
344                "moz",
345                &[
346                    "moz",
347                    "mozi",
348                    "mozil",
349                    "mozill",
350                    "mozilla",
351                    "mozilla firefox"
352                ]
353            ),
354            "mozilla".to_owned(),
355        );
356        assert_eq!(
357            full_keyword(
358                "mozilla",
359                &[
360                    "moz",
361                    "mozi",
362                    "mozil",
363                    "mozill",
364                    "mozilla",
365                    "mozilla firefox"
366                ]
367            ),
368            "mozilla".to_owned(),
369        );
370    }
371
372    #[test]
373    fn keywords_with_longer_phrase() {
374        assert_eq!(
375            full_keyword("moz", &["moz", "mozi", "mozil", "mozill", "mozilla"]),
376            "mozilla".to_owned()
377        );
378        assert_eq!(
379            full_keyword(
380                "mozilla f",
381                &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
382            ),
383            "mozilla firefox".to_owned()
384        );
385    }
386
387    #[test]
388    fn query_ends_with_space() {
389        assert_eq!(
390            full_keyword(
391                "mozilla ",
392                &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
393            ),
394            "mozilla firefox".to_owned()
395        );
396    }
397
398    fn fmc<T: Clone>(
399        query: &str,
400        max_chunk_size: usize,
401        f: impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
402    ) -> Result<Vec<Vec<T>>> {
403        let words: Vec<_> = query.split_whitespace().collect();
404        filter_map_chunks(&words, max_chunk_size, f)
405    }
406
407    fn check_paths(actual: Vec<Vec<(String, usize)>>, expected: Vec<Vec<(&str, usize)>>) {
408        assert_eq!(
409            actual,
410            expected
411                .into_iter()
412                .map(|p| p
413                    .into_iter()
414                    .map(|(w, i)| (w.to_string(), i))
415                    .collect::<Vec<_>>())
416                .collect::<Vec<Vec<_>>>()
417        );
418    }
419
420    #[test]
421    fn filter_map_chunks_1() -> anyhow::Result<()> {
422        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| {
423            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
424        })?;
425        check_paths(
426            paths,
427            vec![vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]],
428        );
429        Ok(())
430    }
431
432    #[test]
433    fn filter_map_chunks_2() -> anyhow::Result<()> {
434        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| {
435            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
436        })?;
437        check_paths(
438            paths,
439            vec![
440                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
441                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
442                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
443                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
444                vec![("a", 0), ("b c", 1), ("d e", 3)],
445                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
446                vec![("a b", 0), ("c", 2), ("d e", 3)],
447                vec![("a b", 0), ("c d", 2), ("e", 4)],
448            ],
449        );
450        Ok(())
451    }
452
453    #[test]
454    fn filter_map_chunks_3() -> anyhow::Result<()> {
455        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| {
456            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
457        })?;
458        check_paths(
459            paths,
460            vec![
461                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
462                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
463                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
464                vec![("a", 0), ("b", 1), ("c d e", 2)],
465                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
466                vec![("a", 0), ("b c", 1), ("d e", 3)],
467                vec![("a", 0), ("b c d", 1), ("e", 4)],
468                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
469                vec![("a b", 0), ("c", 2), ("d e", 3)],
470                vec![("a b", 0), ("c d", 2), ("e", 4)],
471                vec![("a b", 0), ("c d e", 2)],
472                vec![("a b c", 0), ("d", 3), ("e", 4)],
473                vec![("a b c", 0), ("d e", 3)],
474            ],
475        );
476        Ok(())
477    }
478
479    #[test]
480    fn filter_map_chunks_4() -> anyhow::Result<()> {
481        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| {
482            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
483        })?;
484        check_paths(
485            paths,
486            vec![
487                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
488                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
489                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
490                vec![("a", 0), ("b", 1), ("c d e", 2)],
491                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
492                vec![("a", 0), ("b c", 1), ("d e", 3)],
493                vec![("a", 0), ("b c d", 1), ("e", 4)],
494                vec![("a", 0), ("b c d e", 1)],
495                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
496                vec![("a b", 0), ("c", 2), ("d e", 3)],
497                vec![("a b", 0), ("c d", 2), ("e", 4)],
498                vec![("a b", 0), ("c d e", 2)],
499                vec![("a b c", 0), ("d", 3), ("e", 4)],
500                vec![("a b c", 0), ("d e", 3)],
501                vec![("a b c d", 0), ("e", 4)],
502            ],
503        );
504        Ok(())
505    }
506
507    #[test]
508    fn filter_map_chunks_5() -> anyhow::Result<()> {
509        let paths = fmc("a b c d e", 5, |chunk, chunk_index, _, _| {
510            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
511        })?;
512        check_paths(
513            paths,
514            vec![
515                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
516                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
517                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
518                vec![("a", 0), ("b", 1), ("c d e", 2)],
519                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
520                vec![("a", 0), ("b c", 1), ("d e", 3)],
521                vec![("a", 0), ("b c d", 1), ("e", 4)],
522                vec![("a", 0), ("b c d e", 1)],
523                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
524                vec![("a b", 0), ("c", 2), ("d e", 3)],
525                vec![("a b", 0), ("c d", 2), ("e", 4)],
526                vec![("a b", 0), ("c d e", 2)],
527                vec![("a b c", 0), ("d", 3), ("e", 4)],
528                vec![("a b c", 0), ("d e", 3)],
529                vec![("a b c d", 0), ("e", 4)],
530                vec![("a b c d e", 0)],
531            ],
532        );
533        Ok(())
534    }
535
536    #[test]
537    fn filter_map_chunks_1_map_many() -> anyhow::Result<()> {
538        let paths = fmc("a b c", 1, |chunk, _, _, _| {
539            Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
540        })?;
541        assert_eq!(
542            paths,
543            vec![
544                vec!["a0", "b0", "c0"],
545                vec!["a0", "b0", "c1"],
546                vec!["a0", "b0", "c2"],
547                vec!["a0", "b1", "c0"],
548                vec!["a0", "b1", "c1"],
549                vec!["a0", "b1", "c2"],
550                vec!["a0", "b2", "c0"],
551                vec!["a0", "b2", "c1"],
552                vec!["a0", "b2", "c2"],
553                vec!["a1", "b0", "c0"],
554                vec!["a1", "b0", "c1"],
555                vec!["a1", "b0", "c2"],
556                vec!["a1", "b1", "c0"],
557                vec!["a1", "b1", "c1"],
558                vec!["a1", "b1", "c2"],
559                vec!["a1", "b2", "c0"],
560                vec!["a1", "b2", "c1"],
561                vec!["a1", "b2", "c2"],
562                vec!["a2", "b0", "c0"],
563                vec!["a2", "b0", "c1"],
564                vec!["a2", "b0", "c2"],
565                vec!["a2", "b1", "c0"],
566                vec!["a2", "b1", "c1"],
567                vec!["a2", "b1", "c2"],
568                vec!["a2", "b2", "c0"],
569                vec!["a2", "b2", "c1"],
570                vec!["a2", "b2", "c2"]
571            ]
572        );
573        Ok(())
574    }
575
576    #[test]
577    fn filter_map_chunks_2_map_many() -> anyhow::Result<()> {
578        let paths = fmc("a b c", 2, |chunk, _, _, _| {
579            Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
580        })?;
581        assert_eq!(
582            paths,
583            vec![
584                vec!["a0", "b0", "c0"],
585                vec!["a0", "b0", "c1"],
586                vec!["a0", "b0", "c2"],
587                vec!["a0", "b1", "c0"],
588                vec!["a0", "b1", "c1"],
589                vec!["a0", "b1", "c2"],
590                vec!["a0", "b2", "c0"],
591                vec!["a0", "b2", "c1"],
592                vec!["a0", "b2", "c2"],
593                vec!["a0", "b c0"],
594                vec!["a0", "b c1"],
595                vec!["a0", "b c2"],
596                vec!["a1", "b0", "c0"],
597                vec!["a1", "b0", "c1"],
598                vec!["a1", "b0", "c2"],
599                vec!["a1", "b1", "c0"],
600                vec!["a1", "b1", "c1"],
601                vec!["a1", "b1", "c2"],
602                vec!["a1", "b2", "c0"],
603                vec!["a1", "b2", "c1"],
604                vec!["a1", "b2", "c2"],
605                vec!["a1", "b c0"],
606                vec!["a1", "b c1"],
607                vec!["a1", "b c2"],
608                vec!["a2", "b0", "c0"],
609                vec!["a2", "b0", "c1"],
610                vec!["a2", "b0", "c2"],
611                vec!["a2", "b1", "c0"],
612                vec!["a2", "b1", "c1"],
613                vec!["a2", "b1", "c2"],
614                vec!["a2", "b2", "c0"],
615                vec!["a2", "b2", "c1"],
616                vec!["a2", "b2", "c2"],
617                vec!["a2", "b c0"],
618                vec!["a2", "b c1"],
619                vec!["a2", "b c2"],
620                vec!["a b0", "c0"],
621                vec!["a b0", "c1"],
622                vec!["a b0", "c2"],
623                vec!["a b1", "c0"],
624                vec!["a b1", "c1"],
625                vec!["a b1", "c2"],
626                vec!["a b2", "c0"],
627                vec!["a b2", "c1"],
628                vec!["a b2", "c2"]
629            ]
630        );
631        Ok(())
632    }
633
634    #[test]
635    fn filter_map_chunks_1_prune_a() -> anyhow::Result<()> {
636        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
637            "a" => Ok(None),
638            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
639        })?;
640        check_paths(paths, vec![]);
641        Ok(())
642    }
643
644    #[test]
645    fn filter_map_chunks_1_prune_b() -> anyhow::Result<()> {
646        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
647            "b" => Ok(None),
648            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
649        })?;
650        check_paths(paths, vec![]);
651        Ok(())
652    }
653
654    #[test]
655    fn filter_map_chunks_1_prune_c() -> anyhow::Result<()> {
656        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
657            "c" => Ok(None),
658            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
659        })?;
660        check_paths(paths, vec![]);
661        Ok(())
662    }
663
664    #[test]
665    fn filter_map_chunks_1_prune_d() -> anyhow::Result<()> {
666        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
667            "d" => Ok(None),
668            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
669        })?;
670        check_paths(paths, vec![]);
671        Ok(())
672    }
673
674    #[test]
675    fn filter_map_chunks_1_prune_e() -> anyhow::Result<()> {
676        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
677            "e" => Ok(None),
678            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
679        })?;
680        check_paths(paths, vec![]);
681        Ok(())
682    }
683
684    #[test]
685    fn filter_map_chunks_2_prune_a() -> anyhow::Result<()> {
686        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
687            "a" => Ok(None),
688            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
689        })?;
690        check_paths(
691            paths,
692            vec![
693                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
694                vec![("a b", 0), ("c", 2), ("d e", 3)],
695                vec![("a b", 0), ("c d", 2), ("e", 4)],
696            ],
697        );
698        Ok(())
699    }
700
701    #[test]
702    fn filter_map_chunks_2_prune_b() -> anyhow::Result<()> {
703        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
704            "b" => Ok(None),
705            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
706        })?;
707        check_paths(
708            paths,
709            vec![
710                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
711                vec![("a", 0), ("b c", 1), ("d e", 3)],
712                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
713                vec![("a b", 0), ("c", 2), ("d e", 3)],
714                vec![("a b", 0), ("c d", 2), ("e", 4)],
715            ],
716        );
717        Ok(())
718    }
719
720    #[test]
721    fn filter_map_chunks_2_prune_c() -> anyhow::Result<()> {
722        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
723            "c" => Ok(None),
724            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
725        })?;
726        check_paths(
727            paths,
728            vec![
729                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
730                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
731                vec![("a", 0), ("b c", 1), ("d e", 3)],
732                vec![("a b", 0), ("c d", 2), ("e", 4)],
733            ],
734        );
735        Ok(())
736    }
737
738    #[test]
739    fn filter_map_chunks_2_prune_d() -> anyhow::Result<()> {
740        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
741            "d" => Ok(None),
742            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
743        })?;
744        check_paths(
745            paths,
746            vec![
747                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
748                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
749                vec![("a", 0), ("b c", 1), ("d e", 3)],
750                vec![("a b", 0), ("c", 2), ("d e", 3)],
751                vec![("a b", 0), ("c d", 2), ("e", 4)],
752            ],
753        );
754        Ok(())
755    }
756
757    #[test]
758    fn filter_map_chunks_2_prune_e() -> anyhow::Result<()> {
759        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
760            "e" => Ok(None),
761            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
762        })?;
763        check_paths(
764            paths,
765            vec![
766                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
767                vec![("a", 0), ("b c", 1), ("d e", 3)],
768                vec![("a b", 0), ("c", 2), ("d e", 3)],
769            ],
770        );
771        Ok(())
772    }
773
774    #[test]
775    fn filter_map_chunks_2_prune_ab() -> anyhow::Result<()> {
776        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
777            "a b" => Ok(None),
778            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
779        })?;
780        check_paths(
781            paths,
782            vec![
783                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
784                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
785                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
786                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
787                vec![("a", 0), ("b c", 1), ("d e", 3)],
788            ],
789        );
790        Ok(())
791    }
792
793    #[test]
794    fn filter_map_chunks_2_prune_bc() -> anyhow::Result<()> {
795        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
796            "b c" => Ok(None),
797            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
798        })?;
799        check_paths(
800            paths,
801            vec![
802                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
803                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
804                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
805                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
806                vec![("a b", 0), ("c", 2), ("d e", 3)],
807                vec![("a b", 0), ("c d", 2), ("e", 4)],
808            ],
809        );
810        Ok(())
811    }
812
813    #[test]
814    fn filter_map_chunks_2_prune_cd() -> anyhow::Result<()> {
815        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
816            "c d" => Ok(None),
817            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
818        })?;
819        check_paths(
820            paths,
821            vec![
822                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
823                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
824                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
825                vec![("a", 0), ("b c", 1), ("d e", 3)],
826                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
827                vec![("a b", 0), ("c", 2), ("d e", 3)],
828            ],
829        );
830        Ok(())
831    }
832
833    #[test]
834    fn filter_map_chunks_2_prune_de() -> anyhow::Result<()> {
835        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
836            "d e" => Ok(None),
837            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
838        })?;
839        check_paths(
840            paths,
841            vec![
842                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
843                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
844                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
845                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
846                vec![("a b", 0), ("c d", 2), ("e", 4)],
847            ],
848        );
849        Ok(())
850    }
851
852    #[test]
853    fn filter_map_chunks_2_prune_a_bc() -> anyhow::Result<()> {
854        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
855            "a" | "b c" => Ok(None),
856            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
857        })?;
858        check_paths(
859            paths,
860            vec![
861                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
862                vec![("a b", 0), ("c", 2), ("d e", 3)],
863                vec![("a b", 0), ("c d", 2), ("e", 4)],
864            ],
865        );
866        Ok(())
867    }
868
869    #[test]
870    fn filter_map_chunks_2_prune_a_cd() -> anyhow::Result<()> {
871        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
872            "a" | "c d" => Ok(None),
873            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
874        })?;
875        check_paths(
876            paths,
877            vec![
878                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
879                vec![("a b", 0), ("c", 2), ("d e", 3)],
880            ],
881        );
882        Ok(())
883    }
884
885    #[test]
886    fn filter_map_chunks_2_prune_bc_cd() -> anyhow::Result<()> {
887        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
888            "b c" | "c d" => Ok(None),
889            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
890        })?;
891        check_paths(
892            paths,
893            vec![
894                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
895                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
896                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
897                vec![("a b", 0), ("c", 2), ("d e", 3)],
898            ],
899        );
900        Ok(())
901    }
902
903    #[test]
904    fn filter_map_chunks_2_prune_bc_de() -> anyhow::Result<()> {
905        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
906            "b c" | "d e" => Ok(None),
907            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
908        })?;
909        check_paths(
910            paths,
911            vec![
912                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
913                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
914                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
915                vec![("a b", 0), ("c d", 2), ("e", 4)],
916            ],
917        );
918        Ok(())
919    }
920
921    #[test]
922    fn filter_map_chunks_3_prune_a() -> anyhow::Result<()> {
923        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
924            "a" => Ok(None),
925            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
926        })?;
927        check_paths(
928            paths,
929            vec![
930                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
931                vec![("a b", 0), ("c", 2), ("d e", 3)],
932                vec![("a b", 0), ("c d", 2), ("e", 4)],
933                vec![("a b", 0), ("c d e", 2)],
934                vec![("a b c", 0), ("d", 3), ("e", 4)],
935                vec![("a b c", 0), ("d e", 3)],
936            ],
937        );
938        Ok(())
939    }
940
941    #[test]
942    fn filter_map_chunks_3_prune_b() -> anyhow::Result<()> {
943        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
944            "b" => Ok(None),
945            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
946        })?;
947        check_paths(
948            paths,
949            vec![
950                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
951                vec![("a", 0), ("b c", 1), ("d e", 3)],
952                vec![("a", 0), ("b c d", 1), ("e", 4)],
953                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
954                vec![("a b", 0), ("c", 2), ("d e", 3)],
955                vec![("a b", 0), ("c d", 2), ("e", 4)],
956                vec![("a b", 0), ("c d e", 2)],
957                vec![("a b c", 0), ("d", 3), ("e", 4)],
958                vec![("a b c", 0), ("d e", 3)],
959            ],
960        );
961        Ok(())
962    }
963
964    #[test]
965    fn filter_map_chunks_3_prune_c() -> anyhow::Result<()> {
966        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
967            "c" => Ok(None),
968            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
969        })?;
970        check_paths(
971            paths,
972            vec![
973                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
974                vec![("a", 0), ("b", 1), ("c d e", 2)],
975                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
976                vec![("a", 0), ("b c", 1), ("d e", 3)],
977                vec![("a", 0), ("b c d", 1), ("e", 4)],
978                vec![("a b", 0), ("c d", 2), ("e", 4)],
979                vec![("a b", 0), ("c d e", 2)],
980                vec![("a b c", 0), ("d", 3), ("e", 4)],
981                vec![("a b c", 0), ("d e", 3)],
982            ],
983        );
984        Ok(())
985    }
986
987    #[test]
988    fn filter_map_chunks_3_prune_d() -> anyhow::Result<()> {
989        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
990            "d" => Ok(None),
991            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
992        })?;
993        check_paths(
994            paths,
995            vec![
996                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
997                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
998                vec![("a", 0), ("b", 1), ("c d e", 2)],
999                vec![("a", 0), ("b c", 1), ("d e", 3)],
1000                vec![("a", 0), ("b c d", 1), ("e", 4)],
1001                vec![("a b", 0), ("c", 2), ("d e", 3)],
1002                vec![("a b", 0), ("c d", 2), ("e", 4)],
1003                vec![("a b", 0), ("c d e", 2)],
1004                vec![("a b c", 0), ("d e", 3)],
1005            ],
1006        );
1007        Ok(())
1008    }
1009
1010    #[test]
1011    fn filter_map_chunks_3_prune_e() -> anyhow::Result<()> {
1012        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1013            "e" => Ok(None),
1014            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1015        })?;
1016        check_paths(
1017            paths,
1018            vec![
1019                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1020                vec![("a", 0), ("b", 1), ("c d e", 2)],
1021                vec![("a", 0), ("b c", 1), ("d e", 3)],
1022                vec![("a b", 0), ("c", 2), ("d e", 3)],
1023                vec![("a b", 0), ("c d e", 2)],
1024                vec![("a b c", 0), ("d e", 3)],
1025            ],
1026        );
1027        Ok(())
1028    }
1029
1030    #[test]
1031    fn filter_map_chunks_3_prune_ab() -> anyhow::Result<()> {
1032        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1033            "a b" => Ok(None),
1034            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1035        })?;
1036        check_paths(
1037            paths,
1038            vec![
1039                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1040                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1041                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1042                vec![("a", 0), ("b", 1), ("c d e", 2)],
1043                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1044                vec![("a", 0), ("b c", 1), ("d e", 3)],
1045                vec![("a", 0), ("b c d", 1), ("e", 4)],
1046                vec![("a b c", 0), ("d", 3), ("e", 4)],
1047                vec![("a b c", 0), ("d e", 3)],
1048            ],
1049        );
1050        Ok(())
1051    }
1052
1053    #[test]
1054    fn filter_map_chunks_3_prune_bc() -> anyhow::Result<()> {
1055        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1056            "b c" => Ok(None),
1057            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1058        })?;
1059        check_paths(
1060            paths,
1061            vec![
1062                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1063                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1064                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1065                vec![("a", 0), ("b", 1), ("c d e", 2)],
1066                vec![("a", 0), ("b c d", 1), ("e", 4)],
1067                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1068                vec![("a b", 0), ("c", 2), ("d e", 3)],
1069                vec![("a b", 0), ("c d", 2), ("e", 4)],
1070                vec![("a b", 0), ("c d e", 2)],
1071                vec![("a b c", 0), ("d", 3), ("e", 4)],
1072                vec![("a b c", 0), ("d e", 3)],
1073            ],
1074        );
1075        Ok(())
1076    }
1077
1078    #[test]
1079    fn filter_map_chunks_3_prune_cd() -> anyhow::Result<()> {
1080        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1081            "c d" => Ok(None),
1082            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1083        })?;
1084        check_paths(
1085            paths,
1086            vec![
1087                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1088                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1089                vec![("a", 0), ("b", 1), ("c d e", 2)],
1090                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1091                vec![("a", 0), ("b c", 1), ("d e", 3)],
1092                vec![("a", 0), ("b c d", 1), ("e", 4)],
1093                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1094                vec![("a b", 0), ("c", 2), ("d e", 3)],
1095                vec![("a b", 0), ("c d e", 2)],
1096                vec![("a b c", 0), ("d", 3), ("e", 4)],
1097                vec![("a b c", 0), ("d e", 3)],
1098            ],
1099        );
1100        Ok(())
1101    }
1102
1103    #[test]
1104    fn filter_map_chunks_3_prune_de() -> anyhow::Result<()> {
1105        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1106            "d e" => Ok(None),
1107            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1108        })?;
1109        check_paths(
1110            paths,
1111            vec![
1112                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1113                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1114                vec![("a", 0), ("b", 1), ("c d e", 2)],
1115                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1116                vec![("a", 0), ("b c d", 1), ("e", 4)],
1117                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1118                vec![("a b", 0), ("c d", 2), ("e", 4)],
1119                vec![("a b", 0), ("c d e", 2)],
1120                vec![("a b c", 0), ("d", 3), ("e", 4)],
1121            ],
1122        );
1123        Ok(())
1124    }
1125
1126    #[test]
1127    fn filter_map_chunks_3_prune_abc() -> anyhow::Result<()> {
1128        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1129            "a b c" => Ok(None),
1130            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1131        })?;
1132        check_paths(
1133            paths,
1134            vec![
1135                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1136                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1137                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1138                vec![("a", 0), ("b", 1), ("c d e", 2)],
1139                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1140                vec![("a", 0), ("b c", 1), ("d e", 3)],
1141                vec![("a", 0), ("b c d", 1), ("e", 4)],
1142                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1143                vec![("a b", 0), ("c", 2), ("d e", 3)],
1144                vec![("a b", 0), ("c d", 2), ("e", 4)],
1145                vec![("a b", 0), ("c d e", 2)],
1146            ],
1147        );
1148        Ok(())
1149    }
1150
1151    #[test]
1152    fn filter_map_chunks_3_prune_bcd() -> anyhow::Result<()> {
1153        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1154            "b c d" => Ok(None),
1155            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1156        })?;
1157        check_paths(
1158            paths,
1159            vec![
1160                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1161                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1162                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1163                vec![("a", 0), ("b", 1), ("c d e", 2)],
1164                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1165                vec![("a", 0), ("b c", 1), ("d e", 3)],
1166                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1167                vec![("a b", 0), ("c", 2), ("d e", 3)],
1168                vec![("a b", 0), ("c d", 2), ("e", 4)],
1169                vec![("a b", 0), ("c d e", 2)],
1170                vec![("a b c", 0), ("d", 3), ("e", 4)],
1171                vec![("a b c", 0), ("d e", 3)],
1172            ],
1173        );
1174        Ok(())
1175    }
1176
1177    #[test]
1178    fn filter_map_chunks_3_prune_cde() -> anyhow::Result<()> {
1179        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1180            "c d e" => Ok(None),
1181            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1182        })?;
1183        check_paths(
1184            paths,
1185            vec![
1186                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1187                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1188                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1189                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1190                vec![("a", 0), ("b c", 1), ("d e", 3)],
1191                vec![("a", 0), ("b c d", 1), ("e", 4)],
1192                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1193                vec![("a b", 0), ("c", 2), ("d e", 3)],
1194                vec![("a b", 0), ("c d", 2), ("e", 4)],
1195                vec![("a b c", 0), ("d", 3), ("e", 4)],
1196                vec![("a b c", 0), ("d e", 3)],
1197            ],
1198        );
1199        Ok(())
1200    }
1201
1202    #[test]
1203    fn filter_map_chunks_3_prune_a_bc_cde() -> anyhow::Result<()> {
1204        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1205            "a" | "b c" | "c d e" => Ok(None),
1206            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1207        })?;
1208        check_paths(
1209            paths,
1210            vec![
1211                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1212                vec![("a b", 0), ("c", 2), ("d e", 3)],
1213                vec![("a b", 0), ("c d", 2), ("e", 4)],
1214                vec![("a b c", 0), ("d", 3), ("e", 4)],
1215                vec![("a b c", 0), ("d e", 3)],
1216            ],
1217        );
1218        Ok(())
1219    }
1220
1221    #[test]
1222    fn filter_map_chunks_4_prune_a() -> anyhow::Result<()> {
1223        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1224            "a" => Ok(None),
1225            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1226        })?;
1227        check_paths(
1228            paths,
1229            vec![
1230                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1231                vec![("a b", 0), ("c", 2), ("d e", 3)],
1232                vec![("a b", 0), ("c d", 2), ("e", 4)],
1233                vec![("a b", 0), ("c d e", 2)],
1234                vec![("a b c", 0), ("d", 3), ("e", 4)],
1235                vec![("a b c", 0), ("d e", 3)],
1236                vec![("a b c d", 0), ("e", 4)],
1237            ],
1238        );
1239        Ok(())
1240    }
1241
1242    #[test]
1243    fn filter_map_chunks_4_prune_b() -> anyhow::Result<()> {
1244        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1245            "b" => Ok(None),
1246            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1247        })?;
1248        check_paths(
1249            paths,
1250            vec![
1251                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1252                vec![("a", 0), ("b c", 1), ("d e", 3)],
1253                vec![("a", 0), ("b c d", 1), ("e", 4)],
1254                vec![("a", 0), ("b c d e", 1)],
1255                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1256                vec![("a b", 0), ("c", 2), ("d e", 3)],
1257                vec![("a b", 0), ("c d", 2), ("e", 4)],
1258                vec![("a b", 0), ("c d e", 2)],
1259                vec![("a b c", 0), ("d", 3), ("e", 4)],
1260                vec![("a b c", 0), ("d e", 3)],
1261                vec![("a b c d", 0), ("e", 4)],
1262            ],
1263        );
1264        Ok(())
1265    }
1266
1267    #[test]
1268    fn filter_map_chunks_4_prune_c() -> anyhow::Result<()> {
1269        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1270            "c" => Ok(None),
1271            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1272        })?;
1273        check_paths(
1274            paths,
1275            vec![
1276                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1277                vec![("a", 0), ("b", 1), ("c d e", 2)],
1278                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1279                vec![("a", 0), ("b c", 1), ("d e", 3)],
1280                vec![("a", 0), ("b c d", 1), ("e", 4)],
1281                vec![("a", 0), ("b c d e", 1)],
1282                vec![("a b", 0), ("c d", 2), ("e", 4)],
1283                vec![("a b", 0), ("c d e", 2)],
1284                vec![("a b c", 0), ("d", 3), ("e", 4)],
1285                vec![("a b c", 0), ("d e", 3)],
1286                vec![("a b c d", 0), ("e", 4)],
1287            ],
1288        );
1289        Ok(())
1290    }
1291
1292    #[test]
1293    fn filter_map_chunks_4_prune_d() -> anyhow::Result<()> {
1294        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1295            "d" => Ok(None),
1296            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1297        })?;
1298        check_paths(
1299            paths,
1300            vec![
1301                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1302                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1303                vec![("a", 0), ("b", 1), ("c d e", 2)],
1304                vec![("a", 0), ("b c", 1), ("d e", 3)],
1305                vec![("a", 0), ("b c d", 1), ("e", 4)],
1306                vec![("a", 0), ("b c d e", 1)],
1307                vec![("a b", 0), ("c", 2), ("d e", 3)],
1308                vec![("a b", 0), ("c d", 2), ("e", 4)],
1309                vec![("a b", 0), ("c d e", 2)],
1310                vec![("a b c", 0), ("d e", 3)],
1311                vec![("a b c d", 0), ("e", 4)],
1312            ],
1313        );
1314        Ok(())
1315    }
1316
1317    #[test]
1318    fn filter_map_chunks_4_prune_e() -> anyhow::Result<()> {
1319        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1320            "e" => Ok(None),
1321            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1322        })?;
1323        check_paths(
1324            paths,
1325            vec![
1326                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1327                vec![("a", 0), ("b", 1), ("c d e", 2)],
1328                vec![("a", 0), ("b c", 1), ("d e", 3)],
1329                vec![("a", 0), ("b c d e", 1)],
1330                vec![("a b", 0), ("c", 2), ("d e", 3)],
1331                vec![("a b", 0), ("c d e", 2)],
1332                vec![("a b c", 0), ("d e", 3)],
1333            ],
1334        );
1335        Ok(())
1336    }
1337
1338    #[test]
1339    fn filter_map_chunks_4_prune_ab() -> anyhow::Result<()> {
1340        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1341            "a b" => Ok(None),
1342            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1343        })?;
1344        check_paths(
1345            paths,
1346            vec![
1347                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1348                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1349                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1350                vec![("a", 0), ("b", 1), ("c d e", 2)],
1351                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1352                vec![("a", 0), ("b c", 1), ("d e", 3)],
1353                vec![("a", 0), ("b c d", 1), ("e", 4)],
1354                vec![("a", 0), ("b c d e", 1)],
1355                vec![("a b c", 0), ("d", 3), ("e", 4)],
1356                vec![("a b c", 0), ("d e", 3)],
1357                vec![("a b c d", 0), ("e", 4)],
1358            ],
1359        );
1360        Ok(())
1361    }
1362
1363    #[test]
1364    fn filter_map_chunks_4_prune_bc() -> anyhow::Result<()> {
1365        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1366            "b c" => Ok(None),
1367            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1368        })?;
1369        check_paths(
1370            paths,
1371            vec![
1372                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1373                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1374                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1375                vec![("a", 0), ("b", 1), ("c d e", 2)],
1376                vec![("a", 0), ("b c d", 1), ("e", 4)],
1377                vec![("a", 0), ("b c d e", 1)],
1378                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1379                vec![("a b", 0), ("c", 2), ("d e", 3)],
1380                vec![("a b", 0), ("c d", 2), ("e", 4)],
1381                vec![("a b", 0), ("c d e", 2)],
1382                vec![("a b c", 0), ("d", 3), ("e", 4)],
1383                vec![("a b c", 0), ("d e", 3)],
1384                vec![("a b c d", 0), ("e", 4)],
1385            ],
1386        );
1387        Ok(())
1388    }
1389
1390    #[test]
1391    fn filter_map_chunks_4_prune_cd() -> anyhow::Result<()> {
1392        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1393            "c d" => Ok(None),
1394            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1395        })?;
1396        check_paths(
1397            paths,
1398            vec![
1399                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1400                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1401                vec![("a", 0), ("b", 1), ("c d e", 2)],
1402                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1403                vec![("a", 0), ("b c", 1), ("d e", 3)],
1404                vec![("a", 0), ("b c d", 1), ("e", 4)],
1405                vec![("a", 0), ("b c d e", 1)],
1406                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1407                vec![("a b", 0), ("c", 2), ("d e", 3)],
1408                vec![("a b", 0), ("c d e", 2)],
1409                vec![("a b c", 0), ("d", 3), ("e", 4)],
1410                vec![("a b c", 0), ("d e", 3)],
1411                vec![("a b c d", 0), ("e", 4)],
1412            ],
1413        );
1414        Ok(())
1415    }
1416
1417    #[test]
1418    fn filter_map_chunks_4_prune_de() -> anyhow::Result<()> {
1419        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1420            "d e" => Ok(None),
1421            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1422        })?;
1423        check_paths(
1424            paths,
1425            vec![
1426                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1427                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1428                vec![("a", 0), ("b", 1), ("c d e", 2)],
1429                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1430                vec![("a", 0), ("b c d", 1), ("e", 4)],
1431                vec![("a", 0), ("b c d e", 1)],
1432                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1433                vec![("a b", 0), ("c d", 2), ("e", 4)],
1434                vec![("a b", 0), ("c d e", 2)],
1435                vec![("a b c", 0), ("d", 3), ("e", 4)],
1436                vec![("a b c d", 0), ("e", 4)],
1437            ],
1438        );
1439        Ok(())
1440    }
1441
1442    #[test]
1443    fn filter_map_chunks_4_prune_abc() -> anyhow::Result<()> {
1444        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1445            "a b c" => Ok(None),
1446            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1447        })?;
1448        check_paths(
1449            paths,
1450            vec![
1451                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1452                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1453                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1454                vec![("a", 0), ("b", 1), ("c d e", 2)],
1455                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1456                vec![("a", 0), ("b c", 1), ("d e", 3)],
1457                vec![("a", 0), ("b c d", 1), ("e", 4)],
1458                vec![("a", 0), ("b c d e", 1)],
1459                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1460                vec![("a b", 0), ("c", 2), ("d e", 3)],
1461                vec![("a b", 0), ("c d", 2), ("e", 4)],
1462                vec![("a b", 0), ("c d e", 2)],
1463                vec![("a b c d", 0), ("e", 4)],
1464            ],
1465        );
1466        Ok(())
1467    }
1468
1469    #[test]
1470    fn filter_map_chunks_4_prune_bcd() -> anyhow::Result<()> {
1471        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1472            "b c d" => Ok(None),
1473            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1474        })?;
1475        check_paths(
1476            paths,
1477            vec![
1478                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1479                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1480                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1481                vec![("a", 0), ("b", 1), ("c d e", 2)],
1482                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1483                vec![("a", 0), ("b c", 1), ("d e", 3)],
1484                vec![("a", 0), ("b c d e", 1)],
1485                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1486                vec![("a b", 0), ("c", 2), ("d e", 3)],
1487                vec![("a b", 0), ("c d", 2), ("e", 4)],
1488                vec![("a b", 0), ("c d e", 2)],
1489                vec![("a b c", 0), ("d", 3), ("e", 4)],
1490                vec![("a b c", 0), ("d e", 3)],
1491                vec![("a b c d", 0), ("e", 4)],
1492            ],
1493        );
1494        Ok(())
1495    }
1496
1497    #[test]
1498    fn filter_map_chunks_4_prune_cde() -> anyhow::Result<()> {
1499        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1500            "c d e" => Ok(None),
1501            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1502        })?;
1503        check_paths(
1504            paths,
1505            vec![
1506                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1507                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1508                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1509                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1510                vec![("a", 0), ("b c", 1), ("d e", 3)],
1511                vec![("a", 0), ("b c d", 1), ("e", 4)],
1512                vec![("a", 0), ("b c d e", 1)],
1513                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1514                vec![("a b", 0), ("c", 2), ("d e", 3)],
1515                vec![("a b", 0), ("c d", 2), ("e", 4)],
1516                vec![("a b c", 0), ("d", 3), ("e", 4)],
1517                vec![("a b c", 0), ("d e", 3)],
1518                vec![("a b c d", 0), ("e", 4)],
1519            ],
1520        );
1521        Ok(())
1522    }
1523
1524    #[test]
1525    fn filter_map_chunks_4_prune_abcd() -> anyhow::Result<()> {
1526        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1527            "a b c d" => Ok(None),
1528            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1529        })?;
1530        check_paths(
1531            paths,
1532            vec![
1533                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1534                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1535                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1536                vec![("a", 0), ("b", 1), ("c d e", 2)],
1537                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1538                vec![("a", 0), ("b c", 1), ("d e", 3)],
1539                vec![("a", 0), ("b c d", 1), ("e", 4)],
1540                vec![("a", 0), ("b c d e", 1)],
1541                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1542                vec![("a b", 0), ("c", 2), ("d e", 3)],
1543                vec![("a b", 0), ("c d", 2), ("e", 4)],
1544                vec![("a b", 0), ("c d e", 2)],
1545                vec![("a b c", 0), ("d", 3), ("e", 4)],
1546                vec![("a b c", 0), ("d e", 3)],
1547            ],
1548        );
1549        Ok(())
1550    }
1551
1552    #[test]
1553    fn filter_map_chunks_4_prune_bcde() -> anyhow::Result<()> {
1554        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1555            "b c d e" => Ok(None),
1556            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1557        })?;
1558        check_paths(
1559            paths,
1560            vec![
1561                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1562                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1563                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1564                vec![("a", 0), ("b", 1), ("c d e", 2)],
1565                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1566                vec![("a", 0), ("b c", 1), ("d e", 3)],
1567                vec![("a", 0), ("b c d", 1), ("e", 4)],
1568                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1569                vec![("a b", 0), ("c", 2), ("d e", 3)],
1570                vec![("a b", 0), ("c d", 2), ("e", 4)],
1571                vec![("a b", 0), ("c d e", 2)],
1572                vec![("a b c", 0), ("d", 3), ("e", 4)],
1573                vec![("a b c", 0), ("d e", 3)],
1574                vec![("a b c d", 0), ("e", 4)],
1575            ],
1576        );
1577        Ok(())
1578    }
1579
1580    #[test]
1581    fn filter_map_chunks_4_prune_a_bc_de() -> anyhow::Result<()> {
1582        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1583            "a" | "b c" | "d e" => Ok(None),
1584            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1585        })?;
1586        check_paths(
1587            paths,
1588            vec![
1589                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1590                vec![("a b", 0), ("c d", 2), ("e", 4)],
1591                vec![("a b", 0), ("c d e", 2)],
1592                vec![("a b c", 0), ("d", 3), ("e", 4)],
1593                vec![("a b c d", 0), ("e", 4)],
1594            ],
1595        );
1596        Ok(())
1597    }
1598
1599    #[test]
1600    fn filter_map_chunks_4_prune_a_bc_cde() -> anyhow::Result<()> {
1601        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1602            "a" | "b c" | "c d e" => Ok(None),
1603            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1604        })?;
1605        check_paths(
1606            paths,
1607            vec![
1608                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1609                vec![("a b", 0), ("c", 2), ("d e", 3)],
1610                vec![("a b", 0), ("c d", 2), ("e", 4)],
1611                vec![("a b c", 0), ("d", 3), ("e", 4)],
1612                vec![("a b c", 0), ("d e", 3)],
1613                vec![("a b c d", 0), ("e", 4)],
1614            ],
1615        );
1616        Ok(())
1617    }
1618
1619    #[test]
1620    fn filter_map_chunks_spaces() -> anyhow::Result<()> {
1621        let paths = fmc("   a   b  c        d  e ", 2, |chunk, chunk_index, _, _| {
1622            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
1623        })?;
1624        check_paths(
1625            paths,
1626            vec![
1627                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1628                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1629                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1630                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1631                vec![("a", 0), ("b c", 1), ("d e", 3)],
1632                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1633                vec![("a b", 0), ("c", 2), ("d e", 3)],
1634                vec![("a b", 0), ("c d", 2), ("e", 4)],
1635            ],
1636        );
1637        Ok(())
1638    }
1639
1640    #[test]
1641    fn filter_map_chunks_is_last_1() -> anyhow::Result<()> {
1642        let paths = fmc("a b c d e", 1, |chunk, _, is_last, _| {
1643            Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1644        })?;
1645        check_paths(
1646            paths,
1647            vec![vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)]],
1648        );
1649        Ok(())
1650    }
1651
1652    #[test]
1653    fn filter_map_chunks_is_last_2() -> anyhow::Result<()> {
1654        let paths = fmc("a b c d e", 2, |chunk, _, is_last, _| {
1655            Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1656        })?;
1657        check_paths(
1658            paths,
1659            vec![
1660                vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)],
1661                vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)],
1662                vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)],
1663                vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)],
1664                vec![("a", 0), ("b c", 0), ("d e", 1)],
1665                vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)],
1666                vec![("a b", 0), ("c", 0), ("d e", 1)],
1667                vec![("a b", 0), ("c d", 0), ("e", 1)],
1668            ],
1669        );
1670        Ok(())
1671    }
1672
1673    #[test]
1674    fn filter_map_chunks_is_last_3() -> anyhow::Result<()> {
1675        let paths = fmc("a b c d e", 3, |chunk, _, is_last, _| {
1676            Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1677        })?;
1678        check_paths(
1679            paths,
1680            vec![
1681                vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)],
1682                vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)],
1683                vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)],
1684                vec![("a", 0), ("b", 0), ("c d e", 1)],
1685                vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)],
1686                vec![("a", 0), ("b c", 0), ("d e", 1)],
1687                vec![("a", 0), ("b c d", 0), ("e", 1)],
1688                vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)],
1689                vec![("a b", 0), ("c", 0), ("d e", 1)],
1690                vec![("a b", 0), ("c d", 0), ("e", 1)],
1691                vec![("a b", 0), ("c d e", 1)],
1692                vec![("a b c", 0), ("d", 0), ("e", 1)],
1693                vec![("a b c", 0), ("d e", 1)],
1694            ],
1695        );
1696        Ok(())
1697    }
1698
1699    #[test]
1700    fn test_split_keyword() {
1701        assert_eq!(split_keyword("foo"), ("foo", ""));
1702        assert_eq!(split_keyword("foo bar baz"), ("foo", "bar baz"));
1703    }
1704
1705    #[test]
1706    fn i18n_transform() -> anyhow::Result<()> {
1707        // (test str, expected str)
1708        let tests = [
1709            ("AbC", "AbC"),
1710            ("AbC dEf", "AbC dEf"),
1711            ("Àęí", "Aei"),
1712            // "Québec" with single 'é' char
1713            ("Qu\u{00e9}bec", "Quebec"),
1714            // "Québec" with ASCII 'e' followed by combining acute accent
1715            ("Que\u{0301}bec", "Quebec"),
1716            ("Gößnitz", "Goßnitz"),
1717            ("St. Louis", "St Louis"),
1718            ("Washington, D.C.", "Washington DC"),
1719            ("U.S.A.", "USA"),
1720            ("Carmel-by-the-Sea", "Carmel by the Sea"),
1721            ("Val-d'Or", "Val dOr"),
1722            ("Val-d’Or", "Val dOr"),
1723            (".,-'()[]?<>", " ()[]?<>"),
1724        ];
1725        for (test_str, expected_str) in tests {
1726            assert_eq!(
1727                super::i18n_transform(test_str),
1728                expected_str,
1729                "Transform test str: {:?}",
1730                test_str
1731            );
1732        }
1733        Ok(())
1734    }
1735
1736    #[test]
1737    fn i18n_cmp() -> anyhow::Result<()> {
1738        let tests = [
1739            ["AbC xYz", "ABC XYZ", "abc xyz"].as_slice(),
1740            &["Àęí", "Aei", "àęí", "aei"],
1741            &[
1742                // "Québec" with single 'é' char
1743                "Qu\u{00e9}bec",
1744                // "Québec" with ASCII 'e' followed by combining acute accent
1745                "Que\u{0301}bec",
1746                "Quebec",
1747                "quebec",
1748            ],
1749            &[
1750                "Gößnitz",
1751                "Gössnitz",
1752                "Goßnitz",
1753                "Gossnitz",
1754                "gößnitz",
1755                "gössnitz",
1756                "goßnitz",
1757                "gossnitz",
1758            ],
1759            &["St. Louis", "St... Louis", "St Louis", "st louis"],
1760            &[
1761                "Washington, D.C.",
1762                "Washington, DC",
1763                "Washington D.C.",
1764                "Washington DC",
1765                "washington dc",
1766            ],
1767            &[
1768                "U.S.A.", "US.A.", "U.SA.", "U.S.A", "USA.", "U.SA", "USA", "usa",
1769            ],
1770            &[
1771                "Val-d'Or",
1772                "Val-d’Or",
1773                "Val-dOr",
1774                "Val d'Or",
1775                "Val d’Or",
1776                "Val dOr",
1777                "val dor",
1778            ],
1779            &[
1780                "Carmel-by-the-Sea",
1781                "Carmel by the Sea",
1782                "carmel by the sea",
1783            ],
1784            &[".,-'()[]?<>", " ()[]?<>"],
1785        ];
1786        for strs in tests {
1787            for a_and_b in strs.iter().permutations(2) {
1788                assert_eq!(
1789                    super::i18n_cmp(a_and_b[0], a_and_b[1]),
1790                    std::cmp::Ordering::Equal,
1791                    "Comparing: {:?}",
1792                    a_and_b
1793                );
1794            }
1795        }
1796        Ok(())
1797    }
1798}