1use crate::util;
6use bitflags::bitflags;
7use icu_casemap::CaseMapperBorrowed;
8use rusqlite::{
9 self,
10 types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef},
11};
12use std::borrow::Cow;
13
14const MAX_CHARS_TO_SEARCH_THROUGH: usize = 255;
15
16static CASE_MAPPER: CaseMapperBorrowed<'_> = CaseMapperBorrowed::new();
17
18#[derive(Clone, Copy, PartialEq, Eq, Debug)]
19#[repr(u32)]
20pub enum MatchBehavior {
21 Anywhere = 0,
23 BoundaryAnywhere = 1,
26 Boundary = 2,
28 Beginning = 3,
30 AnywhereUnmodified = 4,
33 BeginningCaseSensitive = 5,
36}
37
38impl FromSql for MatchBehavior {
39 #[inline]
40 fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
41 Ok(match value.as_i64()? {
42 0 => MatchBehavior::Anywhere,
43 1 => MatchBehavior::BoundaryAnywhere,
44 2 => MatchBehavior::Boundary,
45 3 => MatchBehavior::Beginning,
46 4 => MatchBehavior::AnywhereUnmodified,
47 5 => MatchBehavior::BeginningCaseSensitive,
48 _ => return Err(FromSqlError::InvalidType),
49 })
50 }
51}
52
53impl ToSql for MatchBehavior {
54 #[inline]
55 fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
56 Ok(ToSqlOutput::from(*self as u32))
57 }
58}
59
60bitflags! {
61 pub struct SearchBehavior: u32 {
62 const HISTORY = 1;
64
65 const BOOKMARK = 1 << 1;
67
68 const TAG = 1 << 2;
70
71 const TITLE = 1 << 3;
73
74 const URL = 1 << 4;
76
77 const TYPED = 1 << 5;
79
80 const JAVASCRIPT = 1 << 6;
82
83 const OPENPAGE = 1 << 7;
85
86 const RESTRICT = 1 << 8;
89
90 const SEARCHES = 1 << 9;
93 }
94}
95
96impl Default for SearchBehavior {
97 fn default() -> SearchBehavior {
99 SearchBehavior::HISTORY
100 | SearchBehavior::BOOKMARK
101 | SearchBehavior::OPENPAGE
102 | SearchBehavior::SEARCHES
103 }
104}
105
106impl SearchBehavior {
107 #[inline]
108 pub fn any() -> Self {
109 SearchBehavior::all() & !SearchBehavior::RESTRICT
110 }
111}
112
113impl FromSql for SearchBehavior {
114 #[inline]
115 fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
116 SearchBehavior::from_bits(u32::column_result(value)?).ok_or(FromSqlError::InvalidType)
117 }
118}
119
120impl ToSql for SearchBehavior {
121 #[inline]
122 fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
123 Ok(ToSqlOutput::from(self.bits()))
124 }
125}
126
127#[inline(always)]
131fn dubious_to_ascii_lower(c: u8) -> u8 {
132 c | 0x20
133}
134
135#[inline(always)]
143fn next_search_candidate(to_search: &str, search_for: char) -> Option<usize> {
144 let search_bytes = to_search.as_bytes();
159 if (search_for as u32) < 128 {
160 let target = dubious_to_ascii_lower(search_for as u8);
165 let special = if target == b'i' {
166 0xc4u8
167 } else if target == b'k' {
168 0xe2u8
169 } else {
170 0xffu8
171 };
172 let mut ci = 0;
175 while ci < search_bytes.len() {
176 let cur = search_bytes[ci];
177 if dubious_to_ascii_lower(cur) == target || cur == special {
178 return Some(ci);
179 }
180 ci += 1;
181 }
182 } else {
183 let mut ci = 0;
184 while ci < search_bytes.len() {
185 let cur = search_bytes[ci];
186 if (cur & 0x80) != 0 {
187 return Some(ci);
188 }
189 ci += 1;
190 }
191 }
192 None
193}
194
195#[inline(always)]
196fn is_ascii_lower_alpha(c: u8) -> bool {
197 c.wrapping_sub(b'a') <= (b'z' - b'a')
199}
200
201#[inline(always)]
208fn is_on_boundary(text: &str, index: usize) -> bool {
209 if index == 0 {
210 return true;
211 }
212 let bytes = text.as_bytes();
213 if is_ascii_lower_alpha(bytes[index]) {
214 let prev_lower = dubious_to_ascii_lower(bytes[index - 1]);
215 !is_ascii_lower_alpha(prev_lower)
216 } else {
217 true
218 }
219}
220
221#[inline]
226fn string_match(token: &str, source: &str) -> bool {
227 if source.len() < token.len() {
228 return false;
229 }
230 let t_folded = CASE_MAPPER.fold_string(token);
231 let mut ti = t_folded.chars();
232 let s_folded = CASE_MAPPER.fold_string(source);
233 let mut si = s_folded.chars();
234 loop {
235 match (ti.next(), si.next()) {
236 (None, _) => return true,
237 (Some(_), None) => return false,
238 (Some(x), Some(y)) => {
239 if x != y {
240 return false;
241 }
242 }
243 }
244 }
245}
246
247#[inline]
250fn char_to_lower_single(c: char) -> char {
251 c.to_lowercase().next().unwrap()
252}
253
254#[inline]
257fn next_codepoint_lower(s: &str) -> (char, usize) {
258 let mut indices = s.char_indices();
261 let (_, next_char) = indices.next().unwrap();
262 let next_index = indices
263 .next()
264 .map(|(index, _)| index)
265 .unwrap_or_else(|| s.len());
266 (char_to_lower_single(next_char), next_index)
267}
268
269pub fn find_in_string(token: &str, src: &str, only_boundary: bool) -> bool {
271 assert!(!token.is_empty(), "Don't search for an empty string");
273 if src.len() < token.len() {
274 return false;
275 }
276
277 let token_first_char = next_codepoint_lower(token).0;
278 let mut cur_offset = 0;
285 while let Some(src_idx) = next_search_candidate(&src[cur_offset..], token_first_char) {
287 if cur_offset + src_idx >= src.len() {
288 break;
289 }
290 cur_offset += src_idx;
291 let src_cur = &src[cur_offset..];
292
293 let (src_next_char, next_offset_in_cur) = next_codepoint_lower(src_cur);
297
298 if src_next_char == token_first_char
301 && (!only_boundary || is_on_boundary(src, cur_offset))
302 && string_match(token, src_cur)
303 {
304 return true;
305 }
306 cur_offset += next_offset_in_cur;
307 }
308 false
309}
310
311fn find_anywhere(token: &str, source: &str) -> bool {
314 assert!(!token.is_empty(), "Don't search for an empty token");
315 find_in_string(token, source, false)
316}
317
318fn find_on_boundary(token: &str, source: &str) -> bool {
319 assert!(!token.is_empty(), "Don't search for an empty token");
320 find_in_string(token, source, true)
321}
322
323fn find_beginning(token: &str, source: &str) -> bool {
324 assert!(!token.is_empty(), "Don't search for an empty token");
325 string_match(token, source)
326}
327
328fn find_beginning_case_sensitive(token: &str, source: &str) -> bool {
329 assert!(!token.is_empty(), "Don't search for an empty token");
330 source.starts_with(token)
331}
332
333pub struct AutocompleteMatch<'search, 'url, 'title, 'tags> {
335 pub search_str: &'search str,
336 pub url_str: &'url str,
337 pub title_str: &'title str,
338 pub tags: &'tags str,
339 pub visit_count: u32,
340 pub typed: bool,
341 pub bookmarked: bool,
342 pub open_page_count: u32,
343 pub match_behavior: MatchBehavior,
344 pub search_behavior: SearchBehavior,
345}
346
347impl AutocompleteMatch<'_, '_, '_, '_> {
348 fn get_search_fn(&self) -> fn(&str, &str) -> bool {
349 match self.match_behavior {
350 MatchBehavior::Anywhere | MatchBehavior::AnywhereUnmodified => find_anywhere,
351 MatchBehavior::Beginning => find_beginning,
352 MatchBehavior::BeginningCaseSensitive => find_beginning_case_sensitive,
353 _ => find_on_boundary,
354 }
355 }
356
357 fn fixup_url_str<'a>(&self, mut s: &'a str) -> Cow<'a, str> {
358 if self.match_behavior != MatchBehavior::AnywhereUnmodified {
359 if s.starts_with("http://") {
360 s = &s[7..];
361 } else if s.starts_with("https://") {
362 s = &s[8..];
363 } else if s.starts_with("ftp://") {
364 s = &s[6..];
365 }
366 }
367 if memchr::memchr(b'%', s.as_bytes()).is_none() {
371 return Cow::Borrowed(s);
372 }
373 match percent_encoding::percent_decode(s.as_bytes()).decode_utf8() {
376 Err(_) => Cow::Borrowed(s),
377 Ok(decoded) => decoded,
378 }
379 }
380
381 #[inline]
382 fn has_behavior(&self, behavior: SearchBehavior) -> bool {
383 self.search_behavior.intersects(behavior)
384 }
385
386 pub fn invoke(&self) -> bool {
387 if self.match_behavior == MatchBehavior::AnywhereUnmodified
390 && self.url_str.starts_with("javascript:")
391 && !self.has_behavior(SearchBehavior::JAVASCRIPT)
392 && !self.search_str.starts_with("javascript:")
393 {
394 return false;
395 }
396 let matches = if self.has_behavior(SearchBehavior::RESTRICT) {
397 (!self.has_behavior(SearchBehavior::HISTORY) || self.visit_count > 0)
398 && (!self.has_behavior(SearchBehavior::TYPED) || self.typed)
399 && (!self.has_behavior(SearchBehavior::BOOKMARK) || self.bookmarked)
400 && (!self.has_behavior(SearchBehavior::TAG) || !self.tags.is_empty())
401 && (!self.has_behavior(SearchBehavior::OPENPAGE) || self.open_page_count > 0)
402 } else {
403 (self.has_behavior(SearchBehavior::HISTORY) && self.visit_count > 0)
404 || (self.has_behavior(SearchBehavior::TYPED) && self.typed)
405 || (self.has_behavior(SearchBehavior::BOOKMARK) && self.bookmarked)
406 || (self.has_behavior(SearchBehavior::TAG) && !self.tags.is_empty())
407 || (self.has_behavior(SearchBehavior::OPENPAGE) && self.open_page_count > 0)
408 };
409 if !matches {
410 return false;
411 }
412 let fixed_url = self.fixup_url_str(self.url_str);
413 let search_fn = self.get_search_fn();
414
415 let trimmed_url = util::slice_up_to(fixed_url.as_ref(), MAX_CHARS_TO_SEARCH_THROUGH);
416 let trimmed_title = util::slice_up_to(self.title_str, MAX_CHARS_TO_SEARCH_THROUGH);
417 for token in self.search_str.split_ascii_whitespace() {
418 let matches = match (
419 self.has_behavior(SearchBehavior::TITLE),
420 self.has_behavior(SearchBehavior::URL),
421 ) {
422 (true, true) => {
423 (search_fn(token, trimmed_title) || search_fn(token, self.tags))
424 && search_fn(token, trimmed_url)
425 }
426 (true, false) => search_fn(token, trimmed_title) || search_fn(token, self.tags),
427 (false, true) => search_fn(token, trimmed_url),
428 (false, false) => {
429 search_fn(token, trimmed_url)
430 || search_fn(token, trimmed_title)
431 || search_fn(token, self.tags)
432 }
433 };
434 if !matches {
435 return false;
436 }
437 }
438 true
439 }
440}
441
442#[cfg(test)]
443mod test {
444 use super::*;
445
446 #[test]
447 fn test_is_ascii_lower_alpha() {
448 for c in 0u8..=255u8 {
450 assert_eq!(
451 is_ascii_lower_alpha(c),
452 c.is_ascii_lowercase(),
453 "is_lower_ascii_alpha is wrong for {}",
454 c
455 );
456 }
457 }
458
459 #[test]
462 fn test_casing_assumptions() {
463 use std::char;
464 for c in 128..0x11_0000 {
472 if let Some(ch) = char::from_u32(c) {
473 let mut li = ch.to_lowercase();
476 let lc = li.next().unwrap();
477 if c != 304 && c != 8490 {
478 assert!(
479 (lc as u32) >= 128,
480 "Lower case of non-ascii '{}' ({}) was unexpectedly ascii",
481 ch,
482 c
483 );
484 assert!(
487 li.next().is_none(),
488 "Lower case of '{}' ({}) produced multiple codepoints unexpectedly",
489 ch,
490 c
491 );
492 } else {
493 assert!(
494 (lc as u32) < 128,
495 "Lower case of non-ascii '{}' ({}) was unexpectedly not ascii",
496 ch,
497 c
498 );
499 }
500 }
501 }
502
503 for c in 0..128 {
505 let ch = char::from_u32(c).unwrap();
506 let mut li = ch.to_lowercase();
507 let lc = li.next().unwrap();
508 assert!(
509 li.next().is_none() && (lc as u32) < 128,
510 "Lower case of ascii '{}' ({}) wasn't ascii :(",
511 ch,
512 c
513 );
514 }
515
516 for c in (b'a'..=b'z').chain(b'A'..=b'Z') {
517 assert_eq!(
518 dubious_to_ascii_lower(c),
519 c.to_ascii_lowercase(),
520 "c: '{}'",
521 c as char
522 );
523 }
524 }
525}