suggest/
util.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 */
5
6use std::borrow::Cow;
7use unicase::UniCase;
8use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};
9
10use crate::Result;
11
12/// Given a list of keywords for a suggestion, returns a phrase that best
13/// completes the user's query. This function uses two heuristics to pick the
14/// best match:
15///
16/// 1. Find the first keyword in the list that has at least one more word than
17///    the query, then trim the keyword up to the end of that word.
18/// 2. If there isn't a keyword with more words, pick the keyword that forms the
19///    longest suffix of the query. This might be the query itself.
20pub fn full_keyword(query: &str, keywords: &[impl AsRef<str>]) -> String {
21    let query_words_len = query.split_whitespace().count();
22    let min_phrase_words_len = if query.ends_with(char::is_whitespace) {
23        // If the query ends with a space, find a keyword with at least one more
24        // word, so that the completed phrase can show a word after the space.
25        query_words_len + 1
26    } else {
27        query_words_len
28    };
29    keywords
30        .iter()
31        .map(AsRef::as_ref)
32        .filter(|phrase| phrase.starts_with(query))
33        .map(|phrase| phrase.split_whitespace().collect::<Vec<_>>())
34        .find(|phrase_words| phrase_words.len() > min_phrase_words_len)
35        .map(|phrase_words| phrase_words[..min_phrase_words_len].join(" "))
36        .unwrap_or_else(|| {
37            keywords
38                .iter()
39                .map(AsRef::as_ref)
40                .filter(|phrase| phrase.starts_with(query) && query.len() < phrase.len())
41                .max_by_key(|phrase| phrase.trim().len())
42                .unwrap_or(query)
43                .to_owned()
44        })
45}
46
47/// Performs a depth-first traversal over all possible chunk sequences in a
48/// slice, applies a filter-map function to each chunk in each sequence, and
49/// collects the filter-mapped sequences in a `Vec`.
50///
51/// "Chunks" are non-overlapping subslices of the parent slice as described in
52/// [`slice::chunks()`].
53///
54/// WARNING: This function potentially does an exponential amount of work! You
55/// should always be careful to prune the traversal space by returning `None`
56/// from your mappper function, as described further below, when a chunk does
57/// not match what you are searching for.
58///
59/// `max_chunk_size` controls the maximum chunk size (in number of words), which
60/// influences the branching factor at each step in the traversal.
61///
62/// At each traversal step, the filter-map function is called like:
63/// `f(chunk, chunk_index, is_last_chunk, path)`.
64///
65/// `chunk` is the chunk at that step, `chunk_index` is its index in the parent
66/// `words` slice, and `is_last_chunk` is true if the chunk is at the end of
67/// `words`. The function can map the chunk to one or more values. Each value
68/// expands the branching factor at the current step by `max_chunk_size`. In
69/// other words, the branching factor at a given traversal step is
70/// `max_chunk_size` multiplied by the number of values returned by the
71/// filter-map function at that step. `path` is the path of mapped values that
72/// has been travsersed at that step: a sequence of mapped values corresponding
73/// to chunks in the parent `words` slice.
74///
75/// The filter-map function can return `None` to halt traversal at the current
76/// step. Returning `None` sets the branching factor at that step to zero,
77/// pruning the subtree rooted at that step from the traversal space and
78/// discarding the path from the output. This is important for keeping traversal
79/// reasonably bounded.
80///
81/// Traversal ends and the function returns when all paths have been visited.
82/// The returned `Vec` will contain all traversal paths that weren't pruned.
83///
84/// # Examples
85///
86/// Mapping chunks in `["a", "b", "c"]` to uppercase, up to a max chunk size of
87/// `3`:
88///
89/// ```
90/// # use suggest::util::filter_map_chunks;
91/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, _| {
92///     Ok(Some(vec![chunk.to_uppercase()]))
93/// });
94/// assert_eq!(paths.unwrap(), vec![
95///     vec!["A", "B", "C"],
96///     vec!["A", "B C"],
97///     vec!["A B", "C"],
98///     vec!["A B C"]
99/// ]);
100/// ```
101///
102/// Same as previous but using `chunk_index` in the filter-map function to prune
103/// paths that don't start with `"a"`:
104///
105/// ```
106/// # use suggest::util::filter_map_chunks;
107/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, chunk_index, _, _| {
108///     if chunk_index > 0 || chunk == "a" {
109///         Ok(Some(vec![chunk.to_uppercase()]))
110///     } else {
111///         Ok(None)
112///     }
113/// });
114/// assert_eq!(paths.unwrap(), vec![
115///     vec!["A", "B", "C"],
116///     vec!["A", "B C"],
117/// ]);
118/// ```
119///
120/// Same as the first example but using `path` in the filter-map function to
121/// prune paths that include "A B":
122///
123/// ```
124/// # use suggest::util::filter_map_chunks;
125/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, path| {
126///     if path.iter().any(|value| value == "A B") {
127///         Ok(None)
128///     } else {
129///         Ok(Some(vec![chunk.to_uppercase()]))
130///     }
131/// });
132/// assert_eq!(paths.unwrap(), vec![
133///     vec!["A", "B", "C"],
134///     vec!["A", "B C"],
135///     vec!["A B C"],
136/// ]);
137/// ```
138///
139/// Mapping each chunk to multiple values:
140///
141/// ```
142/// # use suggest::util::filter_map_chunks;
143/// let paths = filter_map_chunks(&["a", "b", "c"], 3, |chunk, _, _, _| {
144///     Ok(Some(vec![format!("{chunk}0"), format!("{chunk}1")]))
145/// });
146/// assert_eq!(paths.unwrap(), vec![
147///     vec!["a0", "b0", "c0"],
148///     vec!["a0", "b0", "c1"],
149///     vec!["a0", "b1", "c0"],
150///     vec!["a0", "b1", "c1"],
151///     vec!["a0", "b c0"],
152///     vec!["a0", "b c1"],
153///     vec!["a1", "b0", "c0"],
154///     vec!["a1", "b0", "c1"],
155///     vec!["a1", "b1", "c0"],
156///     vec!["a1", "b1", "c1"],
157///     vec!["a1", "b c0"],
158///     vec!["a1", "b c1"],
159///     vec!["a b0", "c0"],
160///     vec!["a b0", "c1"],
161///     vec!["a b1", "c0"],
162///     vec!["a b1", "c1"],
163///     vec!["a b c0"],
164///     vec!["a b c1"]
165/// ]);
166/// ```
167pub fn filter_map_chunks<T: Clone>(
168    words: &[&str],
169    max_chunk_size: usize,
170    f: impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
171) -> Result<Vec<Vec<T>>> {
172    let normalized_query = words.join(" ");
173    filter_map_chunks_recurse(words, &normalized_query, &mut vec![], 0, max_chunk_size, &f)
174}
175
176/// `remaining_words` is the slice of remaining words in the query string at
177/// this step. `remaining_query` is the remaining slice of the normalized query
178/// string at this step.
179///
180/// `path` is the sequence of values returned by the filter-map function so far
181/// at this step.
182///
183/// `chunk_index` is the word-based index in the query string at this step.
184fn filter_map_chunks_recurse<T: Clone>(
185    remaining_words: &[&str],
186    remaining_query: &str,
187    path: &mut Vec<T>,
188    chunk_index: usize,
189    max_chunk_size: usize,
190    f: &impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
191) -> Result<Vec<Vec<T>>> {
192    // Filtered-in (non-pruned) paths that will be returned from this step of
193    // the traversal.
194    let mut this_step_paths: Vec<Vec<T>> = vec![];
195
196    for chunk_size in 1..=max_chunk_size {
197        if remaining_words.len() < chunk_size {
198            // `chunk_size` and the later chunk sizes in this for-loop are too
199            // big to visit the remaining words. We already visited them earlier
200            // in the loop when the chunk size was small enough.
201            break;
202        }
203
204        // Get the current chunk within the remaining query. Its byte length is
205        // the sum of the lengths of the words in the chunk + `chunk_size - 1`
206        // spaces between the words. There will only be one space between each
207        // word in `remaining_query` because `remaining_query` is normalized.
208        let chunk_byte_len = remaining_words[..chunk_size]
209            .iter()
210            .fold(chunk_size - 1, |memo, w| memo + w.len());
211        let chunk = &remaining_query[..chunk_byte_len];
212        let is_last_chunk = chunk_size == remaining_words.len();
213
214        // Call the mapper function.
215        if let Some(mapped_values) = f(chunk, chunk_index, is_last_chunk, &path[..])? {
216            for value in mapped_values {
217                if is_last_chunk {
218                    // This is the last chunk in the path. Stop recursing.
219                    this_step_paths.push(vec![value.clone()]);
220                } else {
221                    // Recurse. Note that the new `remaining_words` slice won't
222                    // be empty because if it were, `chunk_size` would equal the
223                    // remaining word count, which is if-branch condition.
224                    path.push(value.clone());
225                    let subtree_paths = filter_map_chunks_recurse(
226                        &remaining_words[chunk_size..],
227                        &remaining_query[(chunk_byte_len + 1)..],
228                        path,
229                        chunk_index + chunk_size,
230                        max_chunk_size,
231                        f,
232                    )?;
233                    path.pop();
234                    for mut p in subtree_paths {
235                        p.insert(0, value.clone());
236                        this_step_paths.push(p);
237                    }
238                }
239            }
240        }
241    }
242
243    Ok(this_step_paths)
244}
245
246/// Given a keyword for a suggestion, splits the keyword by the first whitespace
247/// into the prefix and the suffix. Returns an empty string as the suffix if there
248/// is no whitespace.
249pub fn split_keyword(keyword: &str) -> (&str, &str) {
250    keyword.split_once(' ').unwrap_or((keyword, ""))
251}
252
253/// Compares two strings ignoring case, Unicode combining marks, and some
254/// punctuation. Intended to be used as a Sqlite collating sequence for
255/// comparing natural language strings like keywords and geoname names.
256pub fn i18n_cmp(a: &str, b: &str) -> std::cmp::Ordering {
257    UniCase::new(i18n_transform(a)).cmp(&UniCase::new(i18n_transform(b)))
258}
259
260/// Performs the following transforms on the given string:
261///
262/// * Removes Unicode combining marks
263/// * Removes some punctuation
264/// * Replaces other punctuation with spaces
265pub fn i18n_transform(s: &str) -> Cow<'_, str> {
266    // Punctuation to remove. Examples:
267    //
268    // "Washington, D.C." => "Washington DC"
269    // "L'Assomption" => "LAssomption"
270    macro_rules! pattern_remove {
271        () => {
272            '.' | ',' | '\'' | '’'
273        };
274    }
275
276    // Punctuation to replace with spaces. Examples:
277    //
278    // "Carmel-by-the-Sea" => "Carmel by the Sea"
279    macro_rules! pattern_replace_with_space {
280        () => {
281            '-'
282        };
283    }
284
285    macro_rules! pattern_all {
286        () => {
287            pattern_remove!() | pattern_replace_with_space!()
288        };
289    }
290
291    let borrowable = !s
292        .nfkd()
293        .any(|c| is_combining_mark(c) || matches!(c, pattern_all!()));
294
295    if borrowable {
296        Cow::from(s)
297    } else {
298        s.nfkd()
299            .filter_map(|c| {
300                if is_combining_mark(c) {
301                    // Remove Unicode combining marks:
302                    // "Que\u{0301}bec" => "Quebec"
303                    None
304                } else {
305                    match c {
306                        pattern_remove!() => None,
307                        pattern_replace_with_space!() => Some(' '),
308                        _ => Some(c),
309                    }
310                }
311            })
312            .collect::<_>()
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use itertools::Itertools;
320
321    #[test]
322    fn keywords_with_more_words() {
323        assert_eq!(
324            full_keyword(
325                "moz",
326                &[
327                    "moz",
328                    "mozi",
329                    "mozil",
330                    "mozill",
331                    "mozilla",
332                    "mozilla firefox"
333                ]
334            ),
335            "mozilla".to_owned(),
336        );
337        assert_eq!(
338            full_keyword(
339                "mozilla",
340                &[
341                    "moz",
342                    "mozi",
343                    "mozil",
344                    "mozill",
345                    "mozilla",
346                    "mozilla firefox"
347                ]
348            ),
349            "mozilla".to_owned(),
350        );
351    }
352
353    #[test]
354    fn keywords_with_longer_phrase() {
355        assert_eq!(
356            full_keyword("moz", &["moz", "mozi", "mozil", "mozill", "mozilla"]),
357            "mozilla".to_owned()
358        );
359        assert_eq!(
360            full_keyword(
361                "mozilla f",
362                &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
363            ),
364            "mozilla firefox".to_owned()
365        );
366    }
367
368    #[test]
369    fn query_ends_with_space() {
370        assert_eq!(
371            full_keyword(
372                "mozilla ",
373                &["moz", "mozi", "mozil", "mozill", "mozilla firefox"]
374            ),
375            "mozilla firefox".to_owned()
376        );
377    }
378
379    fn fmc<T: Clone>(
380        query: &str,
381        max_chunk_size: usize,
382        f: impl Fn(&str, usize, bool, &[T]) -> Result<Option<Vec<T>>>,
383    ) -> Result<Vec<Vec<T>>> {
384        let words: Vec<_> = query.split_whitespace().collect();
385        filter_map_chunks(&words, max_chunk_size, f)
386    }
387
388    fn check_paths(actual: Vec<Vec<(String, usize)>>, expected: Vec<Vec<(&str, usize)>>) {
389        assert_eq!(
390            actual,
391            expected
392                .into_iter()
393                .map(|p| p
394                    .into_iter()
395                    .map(|(w, i)| (w.to_string(), i))
396                    .collect::<Vec<_>>())
397                .collect::<Vec<Vec<_>>>()
398        );
399    }
400
401    #[test]
402    fn filter_map_chunks_1() -> anyhow::Result<()> {
403        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| {
404            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
405        })?;
406        check_paths(
407            paths,
408            vec![vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)]],
409        );
410        Ok(())
411    }
412
413    #[test]
414    fn filter_map_chunks_2() -> anyhow::Result<()> {
415        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| {
416            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
417        })?;
418        check_paths(
419            paths,
420            vec![
421                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
422                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
423                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
424                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
425                vec![("a", 0), ("b c", 1), ("d e", 3)],
426                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
427                vec![("a b", 0), ("c", 2), ("d e", 3)],
428                vec![("a b", 0), ("c d", 2), ("e", 4)],
429            ],
430        );
431        Ok(())
432    }
433
434    #[test]
435    fn filter_map_chunks_3() -> anyhow::Result<()> {
436        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| {
437            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
438        })?;
439        check_paths(
440            paths,
441            vec![
442                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
443                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
444                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
445                vec![("a", 0), ("b", 1), ("c d e", 2)],
446                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
447                vec![("a", 0), ("b c", 1), ("d e", 3)],
448                vec![("a", 0), ("b c d", 1), ("e", 4)],
449                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
450                vec![("a b", 0), ("c", 2), ("d e", 3)],
451                vec![("a b", 0), ("c d", 2), ("e", 4)],
452                vec![("a b", 0), ("c d e", 2)],
453                vec![("a b c", 0), ("d", 3), ("e", 4)],
454                vec![("a b c", 0), ("d e", 3)],
455            ],
456        );
457        Ok(())
458    }
459
460    #[test]
461    fn filter_map_chunks_4() -> anyhow::Result<()> {
462        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| {
463            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
464        })?;
465        check_paths(
466            paths,
467            vec![
468                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
469                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
470                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
471                vec![("a", 0), ("b", 1), ("c d e", 2)],
472                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
473                vec![("a", 0), ("b c", 1), ("d e", 3)],
474                vec![("a", 0), ("b c d", 1), ("e", 4)],
475                vec![("a", 0), ("b c d e", 1)],
476                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
477                vec![("a b", 0), ("c", 2), ("d e", 3)],
478                vec![("a b", 0), ("c d", 2), ("e", 4)],
479                vec![("a b", 0), ("c d e", 2)],
480                vec![("a b c", 0), ("d", 3), ("e", 4)],
481                vec![("a b c", 0), ("d e", 3)],
482                vec![("a b c d", 0), ("e", 4)],
483            ],
484        );
485        Ok(())
486    }
487
488    #[test]
489    fn filter_map_chunks_5() -> anyhow::Result<()> {
490        let paths = fmc("a b c d e", 5, |chunk, chunk_index, _, _| {
491            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
492        })?;
493        check_paths(
494            paths,
495            vec![
496                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
497                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
498                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
499                vec![("a", 0), ("b", 1), ("c d e", 2)],
500                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
501                vec![("a", 0), ("b c", 1), ("d e", 3)],
502                vec![("a", 0), ("b c d", 1), ("e", 4)],
503                vec![("a", 0), ("b c d e", 1)],
504                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
505                vec![("a b", 0), ("c", 2), ("d e", 3)],
506                vec![("a b", 0), ("c d", 2), ("e", 4)],
507                vec![("a b", 0), ("c d e", 2)],
508                vec![("a b c", 0), ("d", 3), ("e", 4)],
509                vec![("a b c", 0), ("d e", 3)],
510                vec![("a b c d", 0), ("e", 4)],
511                vec![("a b c d e", 0)],
512            ],
513        );
514        Ok(())
515    }
516
517    #[test]
518    fn filter_map_chunks_1_map_many() -> anyhow::Result<()> {
519        let paths = fmc("a b c", 1, |chunk, _, _, _| {
520            Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
521        })?;
522        assert_eq!(
523            paths,
524            vec![
525                vec!["a0", "b0", "c0"],
526                vec!["a0", "b0", "c1"],
527                vec!["a0", "b0", "c2"],
528                vec!["a0", "b1", "c0"],
529                vec!["a0", "b1", "c1"],
530                vec!["a0", "b1", "c2"],
531                vec!["a0", "b2", "c0"],
532                vec!["a0", "b2", "c1"],
533                vec!["a0", "b2", "c2"],
534                vec!["a1", "b0", "c0"],
535                vec!["a1", "b0", "c1"],
536                vec!["a1", "b0", "c2"],
537                vec!["a1", "b1", "c0"],
538                vec!["a1", "b1", "c1"],
539                vec!["a1", "b1", "c2"],
540                vec!["a1", "b2", "c0"],
541                vec!["a1", "b2", "c1"],
542                vec!["a1", "b2", "c2"],
543                vec!["a2", "b0", "c0"],
544                vec!["a2", "b0", "c1"],
545                vec!["a2", "b0", "c2"],
546                vec!["a2", "b1", "c0"],
547                vec!["a2", "b1", "c1"],
548                vec!["a2", "b1", "c2"],
549                vec!["a2", "b2", "c0"],
550                vec!["a2", "b2", "c1"],
551                vec!["a2", "b2", "c2"]
552            ]
553        );
554        Ok(())
555    }
556
557    #[test]
558    fn filter_map_chunks_2_map_many() -> anyhow::Result<()> {
559        let paths = fmc("a b c", 2, |chunk, _, _, _| {
560            Ok(Some((0..3).map(|i| format!("{chunk}{i}")).collect()))
561        })?;
562        assert_eq!(
563            paths,
564            vec![
565                vec!["a0", "b0", "c0"],
566                vec!["a0", "b0", "c1"],
567                vec!["a0", "b0", "c2"],
568                vec!["a0", "b1", "c0"],
569                vec!["a0", "b1", "c1"],
570                vec!["a0", "b1", "c2"],
571                vec!["a0", "b2", "c0"],
572                vec!["a0", "b2", "c1"],
573                vec!["a0", "b2", "c2"],
574                vec!["a0", "b c0"],
575                vec!["a0", "b c1"],
576                vec!["a0", "b c2"],
577                vec!["a1", "b0", "c0"],
578                vec!["a1", "b0", "c1"],
579                vec!["a1", "b0", "c2"],
580                vec!["a1", "b1", "c0"],
581                vec!["a1", "b1", "c1"],
582                vec!["a1", "b1", "c2"],
583                vec!["a1", "b2", "c0"],
584                vec!["a1", "b2", "c1"],
585                vec!["a1", "b2", "c2"],
586                vec!["a1", "b c0"],
587                vec!["a1", "b c1"],
588                vec!["a1", "b c2"],
589                vec!["a2", "b0", "c0"],
590                vec!["a2", "b0", "c1"],
591                vec!["a2", "b0", "c2"],
592                vec!["a2", "b1", "c0"],
593                vec!["a2", "b1", "c1"],
594                vec!["a2", "b1", "c2"],
595                vec!["a2", "b2", "c0"],
596                vec!["a2", "b2", "c1"],
597                vec!["a2", "b2", "c2"],
598                vec!["a2", "b c0"],
599                vec!["a2", "b c1"],
600                vec!["a2", "b c2"],
601                vec!["a b0", "c0"],
602                vec!["a b0", "c1"],
603                vec!["a b0", "c2"],
604                vec!["a b1", "c0"],
605                vec!["a b1", "c1"],
606                vec!["a b1", "c2"],
607                vec!["a b2", "c0"],
608                vec!["a b2", "c1"],
609                vec!["a b2", "c2"]
610            ]
611        );
612        Ok(())
613    }
614
615    #[test]
616    fn filter_map_chunks_1_prune_a() -> anyhow::Result<()> {
617        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
618            "a" => Ok(None),
619            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
620        })?;
621        check_paths(paths, vec![]);
622        Ok(())
623    }
624
625    #[test]
626    fn filter_map_chunks_1_prune_b() -> anyhow::Result<()> {
627        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
628            "b" => Ok(None),
629            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
630        })?;
631        check_paths(paths, vec![]);
632        Ok(())
633    }
634
635    #[test]
636    fn filter_map_chunks_1_prune_c() -> anyhow::Result<()> {
637        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
638            "c" => Ok(None),
639            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
640        })?;
641        check_paths(paths, vec![]);
642        Ok(())
643    }
644
645    #[test]
646    fn filter_map_chunks_1_prune_d() -> anyhow::Result<()> {
647        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
648            "d" => Ok(None),
649            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
650        })?;
651        check_paths(paths, vec![]);
652        Ok(())
653    }
654
655    #[test]
656    fn filter_map_chunks_1_prune_e() -> anyhow::Result<()> {
657        let paths = fmc("a b c d e", 1, |chunk, chunk_index, _, _| match chunk {
658            "e" => Ok(None),
659            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
660        })?;
661        check_paths(paths, vec![]);
662        Ok(())
663    }
664
665    #[test]
666    fn filter_map_chunks_2_prune_a() -> anyhow::Result<()> {
667        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
668            "a" => Ok(None),
669            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
670        })?;
671        check_paths(
672            paths,
673            vec![
674                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
675                vec![("a b", 0), ("c", 2), ("d e", 3)],
676                vec![("a b", 0), ("c d", 2), ("e", 4)],
677            ],
678        );
679        Ok(())
680    }
681
682    #[test]
683    fn filter_map_chunks_2_prune_b() -> anyhow::Result<()> {
684        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
685            "b" => Ok(None),
686            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
687        })?;
688        check_paths(
689            paths,
690            vec![
691                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
692                vec![("a", 0), ("b c", 1), ("d e", 3)],
693                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
694                vec![("a b", 0), ("c", 2), ("d e", 3)],
695                vec![("a b", 0), ("c d", 2), ("e", 4)],
696            ],
697        );
698        Ok(())
699    }
700
701    #[test]
702    fn filter_map_chunks_2_prune_c() -> anyhow::Result<()> {
703        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
704            "c" => Ok(None),
705            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
706        })?;
707        check_paths(
708            paths,
709            vec![
710                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
711                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
712                vec![("a", 0), ("b c", 1), ("d e", 3)],
713                vec![("a b", 0), ("c d", 2), ("e", 4)],
714            ],
715        );
716        Ok(())
717    }
718
719    #[test]
720    fn filter_map_chunks_2_prune_d() -> anyhow::Result<()> {
721        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
722            "d" => Ok(None),
723            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
724        })?;
725        check_paths(
726            paths,
727            vec![
728                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
729                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
730                vec![("a", 0), ("b c", 1), ("d e", 3)],
731                vec![("a b", 0), ("c", 2), ("d e", 3)],
732                vec![("a b", 0), ("c d", 2), ("e", 4)],
733            ],
734        );
735        Ok(())
736    }
737
738    #[test]
739    fn filter_map_chunks_2_prune_e() -> anyhow::Result<()> {
740        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
741            "e" => Ok(None),
742            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
743        })?;
744        check_paths(
745            paths,
746            vec![
747                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
748                vec![("a", 0), ("b c", 1), ("d e", 3)],
749                vec![("a b", 0), ("c", 2), ("d e", 3)],
750            ],
751        );
752        Ok(())
753    }
754
755    #[test]
756    fn filter_map_chunks_2_prune_ab() -> anyhow::Result<()> {
757        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
758            "a b" => Ok(None),
759            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
760        })?;
761        check_paths(
762            paths,
763            vec![
764                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
765                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
766                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
767                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
768                vec![("a", 0), ("b c", 1), ("d e", 3)],
769            ],
770        );
771        Ok(())
772    }
773
774    #[test]
775    fn filter_map_chunks_2_prune_bc() -> anyhow::Result<()> {
776        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
777            "b c" => Ok(None),
778            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
779        })?;
780        check_paths(
781            paths,
782            vec![
783                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
784                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
785                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
786                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
787                vec![("a b", 0), ("c", 2), ("d e", 3)],
788                vec![("a b", 0), ("c d", 2), ("e", 4)],
789            ],
790        );
791        Ok(())
792    }
793
794    #[test]
795    fn filter_map_chunks_2_prune_cd() -> anyhow::Result<()> {
796        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
797            "c d" => Ok(None),
798            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
799        })?;
800        check_paths(
801            paths,
802            vec![
803                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
804                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
805                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
806                vec![("a", 0), ("b c", 1), ("d e", 3)],
807                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
808                vec![("a b", 0), ("c", 2), ("d e", 3)],
809            ],
810        );
811        Ok(())
812    }
813
814    #[test]
815    fn filter_map_chunks_2_prune_de() -> anyhow::Result<()> {
816        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
817            "d e" => Ok(None),
818            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
819        })?;
820        check_paths(
821            paths,
822            vec![
823                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
824                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
825                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
826                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
827                vec![("a b", 0), ("c d", 2), ("e", 4)],
828            ],
829        );
830        Ok(())
831    }
832
833    #[test]
834    fn filter_map_chunks_2_prune_a_bc() -> anyhow::Result<()> {
835        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
836            "a" | "b c" => Ok(None),
837            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
838        })?;
839        check_paths(
840            paths,
841            vec![
842                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
843                vec![("a b", 0), ("c", 2), ("d e", 3)],
844                vec![("a b", 0), ("c d", 2), ("e", 4)],
845            ],
846        );
847        Ok(())
848    }
849
850    #[test]
851    fn filter_map_chunks_2_prune_a_cd() -> anyhow::Result<()> {
852        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
853            "a" | "c d" => Ok(None),
854            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
855        })?;
856        check_paths(
857            paths,
858            vec![
859                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
860                vec![("a b", 0), ("c", 2), ("d e", 3)],
861            ],
862        );
863        Ok(())
864    }
865
866    #[test]
867    fn filter_map_chunks_2_prune_bc_cd() -> anyhow::Result<()> {
868        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
869            "b c" | "c d" => Ok(None),
870            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
871        })?;
872        check_paths(
873            paths,
874            vec![
875                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
876                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
877                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
878                vec![("a b", 0), ("c", 2), ("d e", 3)],
879            ],
880        );
881        Ok(())
882    }
883
884    #[test]
885    fn filter_map_chunks_2_prune_bc_de() -> anyhow::Result<()> {
886        let paths = fmc("a b c d e", 2, |chunk, chunk_index, _, _| match chunk {
887            "b c" | "d e" => Ok(None),
888            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
889        })?;
890        check_paths(
891            paths,
892            vec![
893                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
894                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
895                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
896                vec![("a b", 0), ("c d", 2), ("e", 4)],
897            ],
898        );
899        Ok(())
900    }
901
902    #[test]
903    fn filter_map_chunks_3_prune_a() -> anyhow::Result<()> {
904        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
905            "a" => Ok(None),
906            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
907        })?;
908        check_paths(
909            paths,
910            vec![
911                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
912                vec![("a b", 0), ("c", 2), ("d e", 3)],
913                vec![("a b", 0), ("c d", 2), ("e", 4)],
914                vec![("a b", 0), ("c d e", 2)],
915                vec![("a b c", 0), ("d", 3), ("e", 4)],
916                vec![("a b c", 0), ("d e", 3)],
917            ],
918        );
919        Ok(())
920    }
921
922    #[test]
923    fn filter_map_chunks_3_prune_b() -> anyhow::Result<()> {
924        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
925            "b" => Ok(None),
926            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
927        })?;
928        check_paths(
929            paths,
930            vec![
931                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
932                vec![("a", 0), ("b c", 1), ("d e", 3)],
933                vec![("a", 0), ("b c d", 1), ("e", 4)],
934                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
935                vec![("a b", 0), ("c", 2), ("d e", 3)],
936                vec![("a b", 0), ("c d", 2), ("e", 4)],
937                vec![("a b", 0), ("c d e", 2)],
938                vec![("a b c", 0), ("d", 3), ("e", 4)],
939                vec![("a b c", 0), ("d e", 3)],
940            ],
941        );
942        Ok(())
943    }
944
945    #[test]
946    fn filter_map_chunks_3_prune_c() -> anyhow::Result<()> {
947        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
948            "c" => Ok(None),
949            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
950        })?;
951        check_paths(
952            paths,
953            vec![
954                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
955                vec![("a", 0), ("b", 1), ("c d e", 2)],
956                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
957                vec![("a", 0), ("b c", 1), ("d e", 3)],
958                vec![("a", 0), ("b c d", 1), ("e", 4)],
959                vec![("a b", 0), ("c d", 2), ("e", 4)],
960                vec![("a b", 0), ("c d e", 2)],
961                vec![("a b c", 0), ("d", 3), ("e", 4)],
962                vec![("a b c", 0), ("d e", 3)],
963            ],
964        );
965        Ok(())
966    }
967
968    #[test]
969    fn filter_map_chunks_3_prune_d() -> anyhow::Result<()> {
970        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
971            "d" => Ok(None),
972            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
973        })?;
974        check_paths(
975            paths,
976            vec![
977                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
978                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
979                vec![("a", 0), ("b", 1), ("c d e", 2)],
980                vec![("a", 0), ("b c", 1), ("d e", 3)],
981                vec![("a", 0), ("b c d", 1), ("e", 4)],
982                vec![("a b", 0), ("c", 2), ("d e", 3)],
983                vec![("a b", 0), ("c d", 2), ("e", 4)],
984                vec![("a b", 0), ("c d e", 2)],
985                vec![("a b c", 0), ("d e", 3)],
986            ],
987        );
988        Ok(())
989    }
990
991    #[test]
992    fn filter_map_chunks_3_prune_e() -> anyhow::Result<()> {
993        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
994            "e" => Ok(None),
995            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
996        })?;
997        check_paths(
998            paths,
999            vec![
1000                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1001                vec![("a", 0), ("b", 1), ("c d e", 2)],
1002                vec![("a", 0), ("b c", 1), ("d e", 3)],
1003                vec![("a b", 0), ("c", 2), ("d e", 3)],
1004                vec![("a b", 0), ("c d e", 2)],
1005                vec![("a b c", 0), ("d e", 3)],
1006            ],
1007        );
1008        Ok(())
1009    }
1010
1011    #[test]
1012    fn filter_map_chunks_3_prune_ab() -> anyhow::Result<()> {
1013        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1014            "a b" => Ok(None),
1015            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1016        })?;
1017        check_paths(
1018            paths,
1019            vec![
1020                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1021                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1022                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1023                vec![("a", 0), ("b", 1), ("c d e", 2)],
1024                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1025                vec![("a", 0), ("b c", 1), ("d e", 3)],
1026                vec![("a", 0), ("b c d", 1), ("e", 4)],
1027                vec![("a b c", 0), ("d", 3), ("e", 4)],
1028                vec![("a b c", 0), ("d e", 3)],
1029            ],
1030        );
1031        Ok(())
1032    }
1033
1034    #[test]
1035    fn filter_map_chunks_3_prune_bc() -> anyhow::Result<()> {
1036        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1037            "b c" => Ok(None),
1038            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1039        })?;
1040        check_paths(
1041            paths,
1042            vec![
1043                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1044                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1045                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1046                vec![("a", 0), ("b", 1), ("c d e", 2)],
1047                vec![("a", 0), ("b c d", 1), ("e", 4)],
1048                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1049                vec![("a b", 0), ("c", 2), ("d e", 3)],
1050                vec![("a b", 0), ("c d", 2), ("e", 4)],
1051                vec![("a b", 0), ("c d e", 2)],
1052                vec![("a b c", 0), ("d", 3), ("e", 4)],
1053                vec![("a b c", 0), ("d e", 3)],
1054            ],
1055        );
1056        Ok(())
1057    }
1058
1059    #[test]
1060    fn filter_map_chunks_3_prune_cd() -> anyhow::Result<()> {
1061        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1062            "c d" => Ok(None),
1063            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1064        })?;
1065        check_paths(
1066            paths,
1067            vec![
1068                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1069                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1070                vec![("a", 0), ("b", 1), ("c d e", 2)],
1071                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1072                vec![("a", 0), ("b c", 1), ("d e", 3)],
1073                vec![("a", 0), ("b c d", 1), ("e", 4)],
1074                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1075                vec![("a b", 0), ("c", 2), ("d e", 3)],
1076                vec![("a b", 0), ("c d e", 2)],
1077                vec![("a b c", 0), ("d", 3), ("e", 4)],
1078                vec![("a b c", 0), ("d e", 3)],
1079            ],
1080        );
1081        Ok(())
1082    }
1083
1084    #[test]
1085    fn filter_map_chunks_3_prune_de() -> anyhow::Result<()> {
1086        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1087            "d e" => Ok(None),
1088            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1089        })?;
1090        check_paths(
1091            paths,
1092            vec![
1093                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1094                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1095                vec![("a", 0), ("b", 1), ("c d e", 2)],
1096                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1097                vec![("a", 0), ("b c d", 1), ("e", 4)],
1098                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1099                vec![("a b", 0), ("c d", 2), ("e", 4)],
1100                vec![("a b", 0), ("c d e", 2)],
1101                vec![("a b c", 0), ("d", 3), ("e", 4)],
1102            ],
1103        );
1104        Ok(())
1105    }
1106
1107    #[test]
1108    fn filter_map_chunks_3_prune_abc() -> anyhow::Result<()> {
1109        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1110            "a b c" => Ok(None),
1111            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1112        })?;
1113        check_paths(
1114            paths,
1115            vec![
1116                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1117                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1118                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1119                vec![("a", 0), ("b", 1), ("c d e", 2)],
1120                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1121                vec![("a", 0), ("b c", 1), ("d e", 3)],
1122                vec![("a", 0), ("b c d", 1), ("e", 4)],
1123                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1124                vec![("a b", 0), ("c", 2), ("d e", 3)],
1125                vec![("a b", 0), ("c d", 2), ("e", 4)],
1126                vec![("a b", 0), ("c d e", 2)],
1127            ],
1128        );
1129        Ok(())
1130    }
1131
1132    #[test]
1133    fn filter_map_chunks_3_prune_bcd() -> anyhow::Result<()> {
1134        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1135            "b c d" => Ok(None),
1136            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1137        })?;
1138        check_paths(
1139            paths,
1140            vec![
1141                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1142                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1143                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1144                vec![("a", 0), ("b", 1), ("c d e", 2)],
1145                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1146                vec![("a", 0), ("b c", 1), ("d e", 3)],
1147                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1148                vec![("a b", 0), ("c", 2), ("d e", 3)],
1149                vec![("a b", 0), ("c d", 2), ("e", 4)],
1150                vec![("a b", 0), ("c d e", 2)],
1151                vec![("a b c", 0), ("d", 3), ("e", 4)],
1152                vec![("a b c", 0), ("d e", 3)],
1153            ],
1154        );
1155        Ok(())
1156    }
1157
1158    #[test]
1159    fn filter_map_chunks_3_prune_cde() -> anyhow::Result<()> {
1160        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1161            "c d e" => Ok(None),
1162            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1163        })?;
1164        check_paths(
1165            paths,
1166            vec![
1167                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1168                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1169                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1170                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1171                vec![("a", 0), ("b c", 1), ("d e", 3)],
1172                vec![("a", 0), ("b c d", 1), ("e", 4)],
1173                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1174                vec![("a b", 0), ("c", 2), ("d e", 3)],
1175                vec![("a b", 0), ("c d", 2), ("e", 4)],
1176                vec![("a b c", 0), ("d", 3), ("e", 4)],
1177                vec![("a b c", 0), ("d e", 3)],
1178            ],
1179        );
1180        Ok(())
1181    }
1182
1183    #[test]
1184    fn filter_map_chunks_3_prune_a_bc_cde() -> anyhow::Result<()> {
1185        let paths = fmc("a b c d e", 3, |chunk, chunk_index, _, _| match chunk {
1186            "a" | "b c" | "c d e" => Ok(None),
1187            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1188        })?;
1189        check_paths(
1190            paths,
1191            vec![
1192                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1193                vec![("a b", 0), ("c", 2), ("d e", 3)],
1194                vec![("a b", 0), ("c d", 2), ("e", 4)],
1195                vec![("a b c", 0), ("d", 3), ("e", 4)],
1196                vec![("a b c", 0), ("d e", 3)],
1197            ],
1198        );
1199        Ok(())
1200    }
1201
1202    #[test]
1203    fn filter_map_chunks_4_prune_a() -> anyhow::Result<()> {
1204        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1205            "a" => Ok(None),
1206            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1207        })?;
1208        check_paths(
1209            paths,
1210            vec![
1211                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1212                vec![("a b", 0), ("c", 2), ("d e", 3)],
1213                vec![("a b", 0), ("c d", 2), ("e", 4)],
1214                vec![("a b", 0), ("c d e", 2)],
1215                vec![("a b c", 0), ("d", 3), ("e", 4)],
1216                vec![("a b c", 0), ("d e", 3)],
1217                vec![("a b c d", 0), ("e", 4)],
1218            ],
1219        );
1220        Ok(())
1221    }
1222
1223    #[test]
1224    fn filter_map_chunks_4_prune_b() -> anyhow::Result<()> {
1225        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1226            "b" => Ok(None),
1227            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1228        })?;
1229        check_paths(
1230            paths,
1231            vec![
1232                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1233                vec![("a", 0), ("b c", 1), ("d e", 3)],
1234                vec![("a", 0), ("b c d", 1), ("e", 4)],
1235                vec![("a", 0), ("b c d e", 1)],
1236                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1237                vec![("a b", 0), ("c", 2), ("d e", 3)],
1238                vec![("a b", 0), ("c d", 2), ("e", 4)],
1239                vec![("a b", 0), ("c d e", 2)],
1240                vec![("a b c", 0), ("d", 3), ("e", 4)],
1241                vec![("a b c", 0), ("d e", 3)],
1242                vec![("a b c d", 0), ("e", 4)],
1243            ],
1244        );
1245        Ok(())
1246    }
1247
1248    #[test]
1249    fn filter_map_chunks_4_prune_c() -> anyhow::Result<()> {
1250        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1251            "c" => Ok(None),
1252            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1253        })?;
1254        check_paths(
1255            paths,
1256            vec![
1257                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1258                vec![("a", 0), ("b", 1), ("c d e", 2)],
1259                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1260                vec![("a", 0), ("b c", 1), ("d e", 3)],
1261                vec![("a", 0), ("b c d", 1), ("e", 4)],
1262                vec![("a", 0), ("b c d e", 1)],
1263                vec![("a b", 0), ("c d", 2), ("e", 4)],
1264                vec![("a b", 0), ("c d e", 2)],
1265                vec![("a b c", 0), ("d", 3), ("e", 4)],
1266                vec![("a b c", 0), ("d e", 3)],
1267                vec![("a b c d", 0), ("e", 4)],
1268            ],
1269        );
1270        Ok(())
1271    }
1272
1273    #[test]
1274    fn filter_map_chunks_4_prune_d() -> anyhow::Result<()> {
1275        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1276            "d" => Ok(None),
1277            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1278        })?;
1279        check_paths(
1280            paths,
1281            vec![
1282                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1283                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1284                vec![("a", 0), ("b", 1), ("c d e", 2)],
1285                vec![("a", 0), ("b c", 1), ("d e", 3)],
1286                vec![("a", 0), ("b c d", 1), ("e", 4)],
1287                vec![("a", 0), ("b c d e", 1)],
1288                vec![("a b", 0), ("c", 2), ("d e", 3)],
1289                vec![("a b", 0), ("c d", 2), ("e", 4)],
1290                vec![("a b", 0), ("c d e", 2)],
1291                vec![("a b c", 0), ("d e", 3)],
1292                vec![("a b c d", 0), ("e", 4)],
1293            ],
1294        );
1295        Ok(())
1296    }
1297
1298    #[test]
1299    fn filter_map_chunks_4_prune_e() -> anyhow::Result<()> {
1300        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1301            "e" => Ok(None),
1302            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1303        })?;
1304        check_paths(
1305            paths,
1306            vec![
1307                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1308                vec![("a", 0), ("b", 1), ("c d e", 2)],
1309                vec![("a", 0), ("b c", 1), ("d e", 3)],
1310                vec![("a", 0), ("b c d e", 1)],
1311                vec![("a b", 0), ("c", 2), ("d e", 3)],
1312                vec![("a b", 0), ("c d e", 2)],
1313                vec![("a b c", 0), ("d e", 3)],
1314            ],
1315        );
1316        Ok(())
1317    }
1318
1319    #[test]
1320    fn filter_map_chunks_4_prune_ab() -> anyhow::Result<()> {
1321        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1322            "a b" => Ok(None),
1323            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1324        })?;
1325        check_paths(
1326            paths,
1327            vec![
1328                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1329                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1330                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1331                vec![("a", 0), ("b", 1), ("c d e", 2)],
1332                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1333                vec![("a", 0), ("b c", 1), ("d e", 3)],
1334                vec![("a", 0), ("b c d", 1), ("e", 4)],
1335                vec![("a", 0), ("b c d e", 1)],
1336                vec![("a b c", 0), ("d", 3), ("e", 4)],
1337                vec![("a b c", 0), ("d e", 3)],
1338                vec![("a b c d", 0), ("e", 4)],
1339            ],
1340        );
1341        Ok(())
1342    }
1343
1344    #[test]
1345    fn filter_map_chunks_4_prune_bc() -> anyhow::Result<()> {
1346        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1347            "b c" => Ok(None),
1348            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1349        })?;
1350        check_paths(
1351            paths,
1352            vec![
1353                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1354                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1355                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1356                vec![("a", 0), ("b", 1), ("c d e", 2)],
1357                vec![("a", 0), ("b c d", 1), ("e", 4)],
1358                vec![("a", 0), ("b c d e", 1)],
1359                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1360                vec![("a b", 0), ("c", 2), ("d e", 3)],
1361                vec![("a b", 0), ("c d", 2), ("e", 4)],
1362                vec![("a b", 0), ("c d e", 2)],
1363                vec![("a b c", 0), ("d", 3), ("e", 4)],
1364                vec![("a b c", 0), ("d e", 3)],
1365                vec![("a b c d", 0), ("e", 4)],
1366            ],
1367        );
1368        Ok(())
1369    }
1370
1371    #[test]
1372    fn filter_map_chunks_4_prune_cd() -> anyhow::Result<()> {
1373        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1374            "c d" => Ok(None),
1375            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1376        })?;
1377        check_paths(
1378            paths,
1379            vec![
1380                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1381                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1382                vec![("a", 0), ("b", 1), ("c d e", 2)],
1383                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1384                vec![("a", 0), ("b c", 1), ("d e", 3)],
1385                vec![("a", 0), ("b c d", 1), ("e", 4)],
1386                vec![("a", 0), ("b c d e", 1)],
1387                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1388                vec![("a b", 0), ("c", 2), ("d e", 3)],
1389                vec![("a b", 0), ("c d e", 2)],
1390                vec![("a b c", 0), ("d", 3), ("e", 4)],
1391                vec![("a b c", 0), ("d e", 3)],
1392                vec![("a b c d", 0), ("e", 4)],
1393            ],
1394        );
1395        Ok(())
1396    }
1397
1398    #[test]
1399    fn filter_map_chunks_4_prune_de() -> anyhow::Result<()> {
1400        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1401            "d e" => Ok(None),
1402            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1403        })?;
1404        check_paths(
1405            paths,
1406            vec![
1407                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1408                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1409                vec![("a", 0), ("b", 1), ("c d e", 2)],
1410                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1411                vec![("a", 0), ("b c d", 1), ("e", 4)],
1412                vec![("a", 0), ("b c d e", 1)],
1413                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1414                vec![("a b", 0), ("c d", 2), ("e", 4)],
1415                vec![("a b", 0), ("c d e", 2)],
1416                vec![("a b c", 0), ("d", 3), ("e", 4)],
1417                vec![("a b c d", 0), ("e", 4)],
1418            ],
1419        );
1420        Ok(())
1421    }
1422
1423    #[test]
1424    fn filter_map_chunks_4_prune_abc() -> anyhow::Result<()> {
1425        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1426            "a b c" => Ok(None),
1427            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1428        })?;
1429        check_paths(
1430            paths,
1431            vec![
1432                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1433                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1434                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1435                vec![("a", 0), ("b", 1), ("c d e", 2)],
1436                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1437                vec![("a", 0), ("b c", 1), ("d e", 3)],
1438                vec![("a", 0), ("b c d", 1), ("e", 4)],
1439                vec![("a", 0), ("b c d e", 1)],
1440                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1441                vec![("a b", 0), ("c", 2), ("d e", 3)],
1442                vec![("a b", 0), ("c d", 2), ("e", 4)],
1443                vec![("a b", 0), ("c d e", 2)],
1444                vec![("a b c d", 0), ("e", 4)],
1445            ],
1446        );
1447        Ok(())
1448    }
1449
1450    #[test]
1451    fn filter_map_chunks_4_prune_bcd() -> anyhow::Result<()> {
1452        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1453            "b c d" => Ok(None),
1454            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1455        })?;
1456        check_paths(
1457            paths,
1458            vec![
1459                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1460                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1461                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1462                vec![("a", 0), ("b", 1), ("c d e", 2)],
1463                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1464                vec![("a", 0), ("b c", 1), ("d e", 3)],
1465                vec![("a", 0), ("b c d e", 1)],
1466                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1467                vec![("a b", 0), ("c", 2), ("d e", 3)],
1468                vec![("a b", 0), ("c d", 2), ("e", 4)],
1469                vec![("a b", 0), ("c d e", 2)],
1470                vec![("a b c", 0), ("d", 3), ("e", 4)],
1471                vec![("a b c", 0), ("d e", 3)],
1472                vec![("a b c d", 0), ("e", 4)],
1473            ],
1474        );
1475        Ok(())
1476    }
1477
1478    #[test]
1479    fn filter_map_chunks_4_prune_cde() -> anyhow::Result<()> {
1480        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1481            "c d e" => Ok(None),
1482            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1483        })?;
1484        check_paths(
1485            paths,
1486            vec![
1487                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1488                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1489                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1490                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1491                vec![("a", 0), ("b c", 1), ("d e", 3)],
1492                vec![("a", 0), ("b c d", 1), ("e", 4)],
1493                vec![("a", 0), ("b c d e", 1)],
1494                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1495                vec![("a b", 0), ("c", 2), ("d e", 3)],
1496                vec![("a b", 0), ("c d", 2), ("e", 4)],
1497                vec![("a b c", 0), ("d", 3), ("e", 4)],
1498                vec![("a b c", 0), ("d e", 3)],
1499                vec![("a b c d", 0), ("e", 4)],
1500            ],
1501        );
1502        Ok(())
1503    }
1504
1505    #[test]
1506    fn filter_map_chunks_4_prune_abcd() -> anyhow::Result<()> {
1507        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1508            "a b c d" => Ok(None),
1509            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1510        })?;
1511        check_paths(
1512            paths,
1513            vec![
1514                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1515                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1516                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1517                vec![("a", 0), ("b", 1), ("c d e", 2)],
1518                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1519                vec![("a", 0), ("b c", 1), ("d e", 3)],
1520                vec![("a", 0), ("b c d", 1), ("e", 4)],
1521                vec![("a", 0), ("b c d e", 1)],
1522                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1523                vec![("a b", 0), ("c", 2), ("d e", 3)],
1524                vec![("a b", 0), ("c d", 2), ("e", 4)],
1525                vec![("a b", 0), ("c d e", 2)],
1526                vec![("a b c", 0), ("d", 3), ("e", 4)],
1527                vec![("a b c", 0), ("d e", 3)],
1528            ],
1529        );
1530        Ok(())
1531    }
1532
1533    #[test]
1534    fn filter_map_chunks_4_prune_bcde() -> anyhow::Result<()> {
1535        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1536            "b c d e" => Ok(None),
1537            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1538        })?;
1539        check_paths(
1540            paths,
1541            vec![
1542                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1543                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1544                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1545                vec![("a", 0), ("b", 1), ("c d e", 2)],
1546                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1547                vec![("a", 0), ("b c", 1), ("d e", 3)],
1548                vec![("a", 0), ("b c d", 1), ("e", 4)],
1549                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1550                vec![("a b", 0), ("c", 2), ("d e", 3)],
1551                vec![("a b", 0), ("c d", 2), ("e", 4)],
1552                vec![("a b", 0), ("c d e", 2)],
1553                vec![("a b c", 0), ("d", 3), ("e", 4)],
1554                vec![("a b c", 0), ("d e", 3)],
1555                vec![("a b c d", 0), ("e", 4)],
1556            ],
1557        );
1558        Ok(())
1559    }
1560
1561    #[test]
1562    fn filter_map_chunks_4_prune_a_bc_de() -> anyhow::Result<()> {
1563        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1564            "a" | "b c" | "d e" => Ok(None),
1565            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1566        })?;
1567        check_paths(
1568            paths,
1569            vec![
1570                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1571                vec![("a b", 0), ("c d", 2), ("e", 4)],
1572                vec![("a b", 0), ("c d e", 2)],
1573                vec![("a b c", 0), ("d", 3), ("e", 4)],
1574                vec![("a b c d", 0), ("e", 4)],
1575            ],
1576        );
1577        Ok(())
1578    }
1579
1580    #[test]
1581    fn filter_map_chunks_4_prune_a_bc_cde() -> anyhow::Result<()> {
1582        let paths = fmc("a b c d e", 4, |chunk, chunk_index, _, _| match chunk {
1583            "a" | "b c" | "c d e" => Ok(None),
1584            _ => Ok(Some(vec![(chunk.to_string(), chunk_index)])),
1585        })?;
1586        check_paths(
1587            paths,
1588            vec![
1589                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1590                vec![("a b", 0), ("c", 2), ("d e", 3)],
1591                vec![("a b", 0), ("c d", 2), ("e", 4)],
1592                vec![("a b c", 0), ("d", 3), ("e", 4)],
1593                vec![("a b c", 0), ("d e", 3)],
1594                vec![("a b c d", 0), ("e", 4)],
1595            ],
1596        );
1597        Ok(())
1598    }
1599
1600    #[test]
1601    fn filter_map_chunks_spaces() -> anyhow::Result<()> {
1602        let paths = fmc("   a   b  c        d  e ", 2, |chunk, chunk_index, _, _| {
1603            Ok(Some(vec![(chunk.to_string(), chunk_index)]))
1604        })?;
1605        check_paths(
1606            paths,
1607            vec![
1608                vec![("a", 0), ("b", 1), ("c", 2), ("d", 3), ("e", 4)],
1609                vec![("a", 0), ("b", 1), ("c", 2), ("d e", 3)],
1610                vec![("a", 0), ("b", 1), ("c d", 2), ("e", 4)],
1611                vec![("a", 0), ("b c", 1), ("d", 3), ("e", 4)],
1612                vec![("a", 0), ("b c", 1), ("d e", 3)],
1613                vec![("a b", 0), ("c", 2), ("d", 3), ("e", 4)],
1614                vec![("a b", 0), ("c", 2), ("d e", 3)],
1615                vec![("a b", 0), ("c d", 2), ("e", 4)],
1616            ],
1617        );
1618        Ok(())
1619    }
1620
1621    #[test]
1622    fn filter_map_chunks_is_last_1() -> anyhow::Result<()> {
1623        let paths = fmc("a b c d e", 1, |chunk, _, is_last, _| {
1624            Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1625        })?;
1626        check_paths(
1627            paths,
1628            vec![vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)]],
1629        );
1630        Ok(())
1631    }
1632
1633    #[test]
1634    fn filter_map_chunks_is_last_2() -> anyhow::Result<()> {
1635        let paths = fmc("a b c d e", 2, |chunk, _, is_last, _| {
1636            Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1637        })?;
1638        check_paths(
1639            paths,
1640            vec![
1641                vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)],
1642                vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)],
1643                vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)],
1644                vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)],
1645                vec![("a", 0), ("b c", 0), ("d e", 1)],
1646                vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)],
1647                vec![("a b", 0), ("c", 0), ("d e", 1)],
1648                vec![("a b", 0), ("c d", 0), ("e", 1)],
1649            ],
1650        );
1651        Ok(())
1652    }
1653
1654    #[test]
1655    fn filter_map_chunks_is_last_3() -> anyhow::Result<()> {
1656        let paths = fmc("a b c d e", 3, |chunk, _, is_last, _| {
1657            Ok(Some(vec![(chunk.to_string(), is_last as usize)]))
1658        })?;
1659        check_paths(
1660            paths,
1661            vec![
1662                vec![("a", 0), ("b", 0), ("c", 0), ("d", 0), ("e", 1)],
1663                vec![("a", 0), ("b", 0), ("c", 0), ("d e", 1)],
1664                vec![("a", 0), ("b", 0), ("c d", 0), ("e", 1)],
1665                vec![("a", 0), ("b", 0), ("c d e", 1)],
1666                vec![("a", 0), ("b c", 0), ("d", 0), ("e", 1)],
1667                vec![("a", 0), ("b c", 0), ("d e", 1)],
1668                vec![("a", 0), ("b c d", 0), ("e", 1)],
1669                vec![("a b", 0), ("c", 0), ("d", 0), ("e", 1)],
1670                vec![("a b", 0), ("c", 0), ("d e", 1)],
1671                vec![("a b", 0), ("c d", 0), ("e", 1)],
1672                vec![("a b", 0), ("c d e", 1)],
1673                vec![("a b c", 0), ("d", 0), ("e", 1)],
1674                vec![("a b c", 0), ("d e", 1)],
1675            ],
1676        );
1677        Ok(())
1678    }
1679
1680    #[test]
1681    fn test_split_keyword() {
1682        assert_eq!(split_keyword("foo"), ("foo", ""));
1683        assert_eq!(split_keyword("foo bar baz"), ("foo", "bar baz"));
1684    }
1685
1686    #[test]
1687    fn i18n_transform() -> anyhow::Result<()> {
1688        // (test str, expected str)
1689        let tests = [
1690            ("AbC", "AbC"),
1691            ("AbC dEf", "AbC dEf"),
1692            ("Àęí", "Aei"),
1693            // "Québec" with single 'é' char
1694            ("Qu\u{00e9}bec", "Quebec"),
1695            // "Québec" with ASCII 'e' followed by combining acute accent
1696            ("Que\u{0301}bec", "Quebec"),
1697            ("Gößnitz", "Goßnitz"),
1698            ("St. Louis", "St Louis"),
1699            ("Washington, D.C.", "Washington DC"),
1700            ("U.S.A.", "USA"),
1701            ("Carmel-by-the-Sea", "Carmel by the Sea"),
1702            ("Val-d'Or", "Val dOr"),
1703            ("Val-d’Or", "Val dOr"),
1704            (".,-'()[]?<>", " ()[]?<>"),
1705        ];
1706        for (test_str, expected_str) in tests {
1707            assert_eq!(
1708                super::i18n_transform(test_str),
1709                expected_str,
1710                "Transform test str: {:?}",
1711                test_str
1712            );
1713        }
1714        Ok(())
1715    }
1716
1717    #[test]
1718    fn i18n_cmp() -> anyhow::Result<()> {
1719        let tests = [
1720            ["AbC xYz", "ABC XYZ", "abc xyz"].as_slice(),
1721            &["Àęí", "Aei", "àęí", "aei"],
1722            &[
1723                // "Québec" with single 'é' char
1724                "Qu\u{00e9}bec",
1725                // "Québec" with ASCII 'e' followed by combining acute accent
1726                "Que\u{0301}bec",
1727                "Quebec",
1728                "quebec",
1729            ],
1730            &[
1731                "Gößnitz",
1732                "Gössnitz",
1733                "Goßnitz",
1734                "Gossnitz",
1735                "gößnitz",
1736                "gössnitz",
1737                "goßnitz",
1738                "gossnitz",
1739            ],
1740            &["St. Louis", "St... Louis", "St Louis", "st louis"],
1741            &[
1742                "Washington, D.C.",
1743                "Washington, DC",
1744                "Washington D.C.",
1745                "Washington DC",
1746                "washington dc",
1747            ],
1748            &[
1749                "U.S.A.", "US.A.", "U.SA.", "U.S.A", "USA.", "U.SA", "USA", "usa",
1750            ],
1751            &[
1752                "Val-d'Or",
1753                "Val-d’Or",
1754                "Val-dOr",
1755                "Val d'Or",
1756                "Val d’Or",
1757                "Val dOr",
1758                "val dor",
1759            ],
1760            &[
1761                "Carmel-by-the-Sea",
1762                "Carmel by the Sea",
1763                "carmel by the sea",
1764            ],
1765            &[".,-'()[]?<>", " ()[]?<>"],
1766        ];
1767        for strs in tests {
1768            for a_and_b in strs.iter().permutations(2) {
1769                assert_eq!(
1770                    super::i18n_cmp(a_and_b[0], a_and_b[1]),
1771                    std::cmp::Ordering::Equal,
1772                    "Comparing: {:?}",
1773                    a_and_b
1774                );
1775            }
1776        }
1777        Ok(())
1778    }
1779}