1use crate::util;
6use bitflags::bitflags;
7use caseless::Caseless;
8use rusqlite::{
9 self,
10 types::{FromSql, FromSqlError, FromSqlResult, ToSql, ToSqlOutput, ValueRef},
11};
12use std::borrow::Cow;
13
14const MAX_CHARS_TO_SEARCH_THROUGH: usize = 255;
15
16#[derive(Clone, Copy, PartialEq, Eq, Debug)]
17#[repr(u32)]
18pub enum MatchBehavior {
19 Anywhere = 0,
21 BoundaryAnywhere = 1,
24 Boundary = 2,
26 Beginning = 3,
28 AnywhereUnmodified = 4,
31 BeginningCaseSensitive = 5,
34}
35
36impl FromSql for MatchBehavior {
37 #[inline]
38 fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
39 Ok(match value.as_i64()? {
40 0 => MatchBehavior::Anywhere,
41 1 => MatchBehavior::BoundaryAnywhere,
42 2 => MatchBehavior::Boundary,
43 3 => MatchBehavior::Beginning,
44 4 => MatchBehavior::AnywhereUnmodified,
45 5 => MatchBehavior::BeginningCaseSensitive,
46 _ => return Err(FromSqlError::InvalidType),
47 })
48 }
49}
50
51impl ToSql for MatchBehavior {
52 #[inline]
53 fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
54 Ok(ToSqlOutput::from(*self as u32))
55 }
56}
57
58bitflags! {
59 pub struct SearchBehavior: u32 {
60 const HISTORY = 1;
62
63 const BOOKMARK = 1 << 1;
65
66 const TAG = 1 << 2;
68
69 const TITLE = 1 << 3;
71
72 const URL = 1 << 4;
74
75 const TYPED = 1 << 5;
77
78 const JAVASCRIPT = 1 << 6;
80
81 const OPENPAGE = 1 << 7;
83
84 const RESTRICT = 1 << 8;
87
88 const SEARCHES = 1 << 9;
91 }
92}
93
94impl Default for SearchBehavior {
95 fn default() -> SearchBehavior {
97 SearchBehavior::HISTORY
98 | SearchBehavior::BOOKMARK
99 | SearchBehavior::OPENPAGE
100 | SearchBehavior::SEARCHES
101 }
102}
103
104impl SearchBehavior {
105 #[inline]
106 pub fn any() -> Self {
107 SearchBehavior::all() & !SearchBehavior::RESTRICT
108 }
109}
110
111impl FromSql for SearchBehavior {
112 #[inline]
113 fn column_result(value: ValueRef<'_>) -> FromSqlResult<Self> {
114 SearchBehavior::from_bits(u32::column_result(value)?).ok_or(FromSqlError::InvalidType)
115 }
116}
117
118impl ToSql for SearchBehavior {
119 #[inline]
120 fn to_sql(&self) -> rusqlite::Result<ToSqlOutput<'_>> {
121 Ok(ToSqlOutput::from(self.bits()))
122 }
123}
124
125#[inline(always)]
129fn dubious_to_ascii_lower(c: u8) -> u8 {
130 c | 0x20
131}
132
133#[inline(always)]
141fn next_search_candidate(to_search: &str, search_for: char) -> Option<usize> {
142 let search_bytes = to_search.as_bytes();
157 if (search_for as u32) < 128 {
158 let target = dubious_to_ascii_lower(search_for as u8);
163 let special = if target == b'i' {
164 0xc4u8
165 } else if target == b'k' {
166 0xe2u8
167 } else {
168 0xffu8
169 };
170 let mut ci = 0;
173 while ci < search_bytes.len() {
174 let cur = search_bytes[ci];
175 if dubious_to_ascii_lower(cur) == target || cur == special {
176 return Some(ci);
177 }
178 ci += 1;
179 }
180 } else {
181 let mut ci = 0;
182 while ci < search_bytes.len() {
183 let cur = search_bytes[ci];
184 if (cur & 0x80) != 0 {
185 return Some(ci);
186 }
187 ci += 1;
188 }
189 }
190 None
191}
192
193#[inline(always)]
194fn is_ascii_lower_alpha(c: u8) -> bool {
195 c.wrapping_sub(b'a') <= (b'z' - b'a')
197}
198
199#[inline(always)]
206fn is_on_boundary(text: &str, index: usize) -> bool {
207 if index == 0 {
208 return true;
209 }
210 let bytes = text.as_bytes();
211 if is_ascii_lower_alpha(bytes[index]) {
212 let prev_lower = dubious_to_ascii_lower(bytes[index - 1]);
213 !is_ascii_lower_alpha(prev_lower)
214 } else {
215 true
216 }
217}
218
219#[inline]
224fn string_match(token: &str, source: &str) -> bool {
225 if source.len() < token.len() {
226 return false;
227 }
228 let mut ti = token.chars().default_case_fold();
229 let mut si = source.chars().default_case_fold();
230 loop {
231 match (ti.next(), si.next()) {
232 (None, _) => return true,
233 (Some(_), None) => return false,
234 (Some(x), Some(y)) => {
235 if x != y {
236 return false;
237 }
238 }
239 }
240 }
241}
242
243#[inline]
246fn char_to_lower_single(c: char) -> char {
247 c.to_lowercase().next().unwrap()
248}
249
250#[inline]
253fn next_codepoint_lower(s: &str) -> (char, usize) {
254 let mut indices = s.char_indices();
257 let (_, next_char) = indices.next().unwrap();
258 let next_index = indices
259 .next()
260 .map(|(index, _)| index)
261 .unwrap_or_else(|| s.len());
262 (char_to_lower_single(next_char), next_index)
263}
264
265pub fn find_in_string(token: &str, src: &str, only_boundary: bool) -> bool {
267 assert!(!token.is_empty(), "Don't search for an empty string");
269 if src.len() < token.len() {
270 return false;
271 }
272
273 let token_first_char = next_codepoint_lower(token).0;
274 let mut cur_offset = 0;
281 while let Some(src_idx) = next_search_candidate(&src[cur_offset..], token_first_char) {
283 if cur_offset + src_idx >= src.len() {
284 break;
285 }
286 cur_offset += src_idx;
287 let src_cur = &src[cur_offset..];
288
289 let (src_next_char, next_offset_in_cur) = next_codepoint_lower(src_cur);
293
294 if src_next_char == token_first_char
297 && (!only_boundary || is_on_boundary(src, cur_offset))
298 && string_match(token, src_cur)
299 {
300 return true;
301 }
302 cur_offset += next_offset_in_cur;
303 }
304 false
305}
306
307fn find_anywhere(token: &str, source: &str) -> bool {
310 assert!(!token.is_empty(), "Don't search for an empty token");
311 find_in_string(token, source, false)
312}
313
314fn find_on_boundary(token: &str, source: &str) -> bool {
315 assert!(!token.is_empty(), "Don't search for an empty token");
316 find_in_string(token, source, true)
317}
318
319fn find_beginning(token: &str, source: &str) -> bool {
320 assert!(!token.is_empty(), "Don't search for an empty token");
321 string_match(token, source)
322}
323
324fn find_beginning_case_sensitive(token: &str, source: &str) -> bool {
325 assert!(!token.is_empty(), "Don't search for an empty token");
326 source.starts_with(token)
327}
328
329pub struct AutocompleteMatch<'search, 'url, 'title, 'tags> {
331 pub search_str: &'search str,
332 pub url_str: &'url str,
333 pub title_str: &'title str,
334 pub tags: &'tags str,
335 pub visit_count: u32,
336 pub typed: bool,
337 pub bookmarked: bool,
338 pub open_page_count: u32,
339 pub match_behavior: MatchBehavior,
340 pub search_behavior: SearchBehavior,
341}
342
343impl AutocompleteMatch<'_, '_, '_, '_> {
344 fn get_search_fn(&self) -> fn(&str, &str) -> bool {
345 match self.match_behavior {
346 MatchBehavior::Anywhere | MatchBehavior::AnywhereUnmodified => find_anywhere,
347 MatchBehavior::Beginning => find_beginning,
348 MatchBehavior::BeginningCaseSensitive => find_beginning_case_sensitive,
349 _ => find_on_boundary,
350 }
351 }
352
353 fn fixup_url_str<'a>(&self, mut s: &'a str) -> Cow<'a, str> {
354 if self.match_behavior != MatchBehavior::AnywhereUnmodified {
355 if s.starts_with("http://") {
356 s = &s[7..];
357 } else if s.starts_with("https://") {
358 s = &s[8..];
359 } else if s.starts_with("ftp://") {
360 s = &s[6..];
361 }
362 }
363 if memchr::memchr(b'%', s.as_bytes()).is_none() {
367 return Cow::Borrowed(s);
368 }
369 match percent_encoding::percent_decode(s.as_bytes()).decode_utf8() {
372 Err(_) => Cow::Borrowed(s),
373 Ok(decoded) => decoded,
374 }
375 }
376
377 #[inline]
378 fn has_behavior(&self, behavior: SearchBehavior) -> bool {
379 self.search_behavior.intersects(behavior)
380 }
381
382 pub fn invoke(&self) -> bool {
383 if self.match_behavior == MatchBehavior::AnywhereUnmodified
386 && self.url_str.starts_with("javascript:")
387 && !self.has_behavior(SearchBehavior::JAVASCRIPT)
388 && !self.search_str.starts_with("javascript:")
389 {
390 return false;
391 }
392 let matches = if self.has_behavior(SearchBehavior::RESTRICT) {
393 (!self.has_behavior(SearchBehavior::HISTORY) || self.visit_count > 0)
394 && (!self.has_behavior(SearchBehavior::TYPED) || self.typed)
395 && (!self.has_behavior(SearchBehavior::BOOKMARK) || self.bookmarked)
396 && (!self.has_behavior(SearchBehavior::TAG) || !self.tags.is_empty())
397 && (!self.has_behavior(SearchBehavior::OPENPAGE) || self.open_page_count > 0)
398 } else {
399 (self.has_behavior(SearchBehavior::HISTORY) && self.visit_count > 0)
400 || (self.has_behavior(SearchBehavior::TYPED) && self.typed)
401 || (self.has_behavior(SearchBehavior::BOOKMARK) && self.bookmarked)
402 || (self.has_behavior(SearchBehavior::TAG) && !self.tags.is_empty())
403 || (self.has_behavior(SearchBehavior::OPENPAGE) && self.open_page_count > 0)
404 };
405 if !matches {
406 return false;
407 }
408 let fixed_url = self.fixup_url_str(self.url_str);
409 let search_fn = self.get_search_fn();
410
411 let trimmed_url = util::slice_up_to(fixed_url.as_ref(), MAX_CHARS_TO_SEARCH_THROUGH);
412 let trimmed_title = util::slice_up_to(self.title_str, MAX_CHARS_TO_SEARCH_THROUGH);
413 for token in self.search_str.split_ascii_whitespace() {
414 let matches = match (
415 self.has_behavior(SearchBehavior::TITLE),
416 self.has_behavior(SearchBehavior::URL),
417 ) {
418 (true, true) => {
419 (search_fn(token, trimmed_title) || search_fn(token, self.tags))
420 && search_fn(token, trimmed_url)
421 }
422 (true, false) => search_fn(token, trimmed_title) || search_fn(token, self.tags),
423 (false, true) => search_fn(token, trimmed_url),
424 (false, false) => {
425 search_fn(token, trimmed_url)
426 || search_fn(token, trimmed_title)
427 || search_fn(token, self.tags)
428 }
429 };
430 if !matches {
431 return false;
432 }
433 }
434 true
435 }
436}
437
438#[cfg(test)]
439mod test {
440 use super::*;
441
442 #[test]
443 fn test_is_ascii_lower_alpha() {
444 for c in 0u8..=255u8 {
446 assert_eq!(
447 is_ascii_lower_alpha(c),
448 c.is_ascii_lowercase(),
449 "is_lower_ascii_alpha is wrong for {}",
450 c
451 );
452 }
453 }
454
455 #[test]
458 fn test_casing_assumptions() {
459 use std::char;
460 for c in 128..0x11_0000 {
468 if let Some(ch) = char::from_u32(c) {
469 let mut li = ch.to_lowercase();
472 let lc = li.next().unwrap();
473 if c != 304 && c != 8490 {
474 assert!(
475 (lc as u32) >= 128,
476 "Lower case of non-ascii '{}' ({}) was unexpectedly ascii",
477 ch,
478 c
479 );
480 assert!(
483 li.next().is_none(),
484 "Lower case of '{}' ({}) produced multiple codepoints unexpectedly",
485 ch,
486 c
487 );
488 } else {
489 assert!(
490 (lc as u32) < 128,
491 "Lower case of non-ascii '{}' ({}) was unexpectedly not ascii",
492 ch,
493 c
494 );
495 }
496 }
497 }
498
499 for c in 0..128 {
501 let ch = char::from_u32(c).unwrap();
502 let mut li = ch.to_lowercase();
503 let lc = li.next().unwrap();
504 assert!(
505 li.next().is_none() && (lc as u32) < 128,
506 "Lower case of ascii '{}' ({}) wasn't ascii :(",
507 ch,
508 c
509 );
510 }
511
512 for c in (b'a'..=b'z').chain(b'A'..=b'Z') {
513 assert_eq!(
514 dubious_to_ascii_lower(c),
515 c.to_ascii_lowercase(),
516 "c: '{}'",
517 c as char
518 );
519 }
520 }
521}