use crate::db::PlacesDb;
use crate::error::Result;
use crate::ffi::SearchResult as FfiSearchResult;
pub use crate::match_impl::{MatchBehavior, SearchBehavior};
use rusqlite::Row;
use serde_derive::*;
use sql_support::ConnExt;
use url::Url;
fn query_flat_rows_and_then<T, F, P>(
conn: &PlacesDb,
sql: &str,
params: P,
mapper: F,
) -> Result<Vec<T>>
where
F: FnMut(&Row<'_>) -> Result<T>,
P: rusqlite::Params,
{
let mut stmt = conn.prepare_maybe_cached(sql, true)?;
let iter = stmt.query_and_then(params, mapper)?;
Ok(iter
.inspect(|r| {
if let Err(ref e) = *r {
log::warn!("Failed to perform a search: {}", e);
if cfg!(debug_assertions) {
panic!("Failed to perform a search: {}", e);
}
}
})
.flatten()
.collect::<Vec<_>>())
}
#[derive(Debug, Clone)]
pub struct SearchParams {
pub search_string: String,
pub limit: u32,
}
pub fn search_frecent(conn: &PlacesDb, params: SearchParams) -> Result<Vec<SearchResult>> {
let mut matches = match_with_limit(
conn,
&[
&OriginOrUrl::new(¶ms.search_string),
&Adaptive::with_behavior(
¶ms.search_string,
MatchBehavior::Anywhere,
SearchBehavior::default(),
),
&Suggestions::with_behavior(
¶ms.search_string,
MatchBehavior::Anywhere,
SearchBehavior::default(),
),
],
params.limit,
)?;
matches.sort_unstable_by(|a, b| a.url.cmp(&b.url));
matches.dedup_by(|a, b| a.url == b.url);
Ok(matches)
}
pub fn match_url(conn: &PlacesDb, query: impl AsRef<str>) -> Result<Option<Url>> {
let scope = conn.begin_interrupt_scope()?;
let matcher = OriginOrUrl::new(query.as_ref());
let results = matcher.search(conn, 1)?;
scope.err_if_interrupted()?;
if let Some(res) = results.into_iter().next() {
Ok(Some(res.url))
} else {
Ok(None)
}
}
fn match_with_limit(
conn: &PlacesDb,
matchers: &[&dyn Matcher],
max_results: u32,
) -> Result<Vec<SearchResult>> {
let mut results = Vec::new();
let mut rem_results = max_results;
let scope = conn.begin_interrupt_scope()?;
for m in matchers {
if rem_results == 0 {
break;
}
scope.err_if_interrupted()?;
let matches = m.search(conn, rem_results)?;
results.extend(matches);
rem_results = rem_results.saturating_sub(results.len() as u32);
}
Ok(results)
}
pub fn accept_result(conn: &PlacesDb, search_string: &str, url: &Url) -> Result<()> {
conn.execute(
"INSERT OR REPLACE INTO moz_inputhistory(place_id, input, use_count)
SELECT h.id, IFNULL(i.input, :input_text), IFNULL(i.use_count, 0) * .9 + 1
FROM moz_places h
LEFT JOIN moz_inputhistory i ON i.place_id = h.id AND i.input = :input_text
WHERE url_hash = hash(:page_url) AND url = :page_url",
&[
(":input_text", &search_string),
(":page_url", &url.as_str()),
],
)?;
Ok(())
}
pub fn split_after_prefix(href: &str) -> (&str, &str) {
let haystack = &href.as_bytes()[..href.len().min(64)];
match memchr::memchr(b':', haystack) {
None => ("", href),
Some(index) => {
let hb = href.as_bytes();
let mut end = index + 1;
if hb.len() >= end + 2 && hb[end] == b'/' && hb[end + 1] == b'/' {
end += 2;
}
href.split_at(end)
}
}
}
pub fn split_after_host_and_port(href: &str) -> (&str, &str) {
let (_, remainder) = split_after_prefix(href);
let hp_definite_end =
memchr::memchr3(b'/', b'?', b'#', remainder.as_bytes()).unwrap_or(remainder.len());
let (before_hp, after_hp) = remainder.split_at(hp_definite_end);
let auth_end = memchr::memchr(b'@', before_hp.as_bytes())
.map(|i| i + 1)
.unwrap_or(0);
(&before_hp[auth_end..], after_hp)
}
fn looks_like_origin(string: &str) -> bool {
!string.is_empty()
&& !string.bytes().any(|c| {
!c.is_ascii() || c.is_ascii_whitespace() || c == b'/' || c == b'?' || c == b'#'
})
}
#[derive(Debug, Clone, Serialize, Eq, PartialEq)]
pub struct SearchResult {
pub search_string: String,
pub url: Url,
pub title: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub icon_url: Option<Url>,
pub frecency: i64,
}
impl SearchResult {
pub fn from_adaptive_row(row: &rusqlite::Row<'_>) -> Result<Self> {
let search_string = row.get::<_, String>("searchString")?;
let _place_id = row.get::<_, i64>("id")?;
let url = row.get::<_, String>("url")?;
let history_title = row.get::<_, Option<String>>("title")?;
let bookmark_title = row.get::<_, Option<String>>("btitle")?;
let frecency = row.get::<_, i64>("frecency")?;
let title = bookmark_title.or(history_title).unwrap_or_default();
let url = Url::parse(&url)?;
Ok(Self {
search_string,
url,
title,
icon_url: None,
frecency,
})
}
pub fn from_suggestion_row(row: &rusqlite::Row<'_>) -> Result<Self> {
let search_string = row.get::<_, String>("searchString")?;
let url = row.get::<_, String>("url")?;
let history_title = row.get::<_, Option<String>>("title")?;
let bookmark_title = row.get::<_, Option<String>>("btitle")?;
let title = bookmark_title.or(history_title).unwrap_or_default();
let url = Url::parse(&url)?;
let frecency = row.get::<_, i64>("frecency")?;
Ok(Self {
search_string,
url,
title,
icon_url: None,
frecency,
})
}
pub fn from_origin_row(row: &rusqlite::Row<'_>) -> Result<Self> {
let search_string = row.get::<_, String>("searchString")?;
let url = row.get::<_, String>("url")?;
let display_url = row.get::<_, String>("displayURL")?;
let frecency = row.get::<_, i64>("frecency")?;
let url = Url::parse(&url)?;
Ok(Self {
search_string,
url,
title: display_url,
icon_url: None,
frecency,
})
}
pub fn from_url_row(row: &rusqlite::Row<'_>) -> Result<Self> {
let search_string = row.get::<_, String>("searchString")?;
let href = row.get::<_, String>("url")?;
let stripped_url = row.get::<_, String>("strippedURL")?;
let frecency = row.get::<_, i64>("frecency")?;
let (url, display_url) = match href.find(&stripped_url) {
Some(stripped_url_index) => {
let stripped_prefix = &href[..stripped_url_index];
let title = match &href[stripped_url_index + stripped_url.len()..].find('/') {
Some(next_slash_index) => {
&href[stripped_url_index
..=stripped_url_index + stripped_url.len() + next_slash_index]
}
None => &href[stripped_url_index..],
};
let url = Url::parse(&[stripped_prefix, title].concat())?;
(url, title.into())
}
None => {
let url = Url::parse(&href)?;
(url, stripped_url)
}
};
Ok(Self {
search_string,
url,
title: display_url,
icon_url: None,
frecency,
})
}
}
impl From<SearchResult> for FfiSearchResult {
fn from(res: SearchResult) -> Self {
Self {
url: res.url,
title: res.title,
frecency: res.frecency,
}
}
}
trait Matcher {
fn search(&self, conn: &PlacesDb, max_results: u32) -> Result<Vec<SearchResult>>;
}
struct OriginOrUrl<'query> {
query: &'query str,
}
impl<'query> OriginOrUrl<'query> {
pub fn new(query: &'query str) -> OriginOrUrl<'query> {
OriginOrUrl { query }
}
}
const URL_SQL: &str = "
SELECT h.url as url,
:host || :remainder AS strippedURL,
h.frecency as frecency,
h.foreign_count > 0 AS bookmarked,
h.id as id,
:searchString AS searchString
FROM moz_places h
JOIN moz_origins o ON o.id = h.origin_id
WHERE o.rev_host = reverse_host(:host)
AND MAX(h.frecency, 0) >= :frecencyThreshold
AND h.hidden = 0
AND strip_prefix_and_userinfo(h.url) BETWEEN strippedURL AND strippedURL || X'FFFF'
UNION ALL
SELECT h.url as url,
:host || :remainder AS strippedURL,
h.frecency as frecency,
h.foreign_count > 0 AS bookmarked,
h.id as id,
:searchString AS searchString
FROM moz_places h
JOIN moz_origins o ON o.id = h.origin_id
WHERE o.rev_host = reverse_host(:host) || 'www.'
AND MAX(h.frecency, 0) >= :frecencyThreshold
AND h.hidden = 0
AND strip_prefix_and_userinfo(h.url) BETWEEN 'www.' || strippedURL AND 'www.' || strippedURL || X'FFFF'
ORDER BY h.frecency DESC, h.id DESC
LIMIT 1
";
const ORIGIN_SQL: &str = "
SELECT IFNULL(:prefix, prefix) || moz_origins.host || '/' AS url,
moz_origins.host || '/' AS displayURL,
frecency,
bookmarked,
id,
:searchString AS searchString
FROM (
SELECT host,
TOTAL(frecency) AS host_frecency,
(SELECT TOTAL(foreign_count) > 0 FROM moz_places
WHERE moz_places.origin_id = moz_origins.id) AS bookmarked
FROM moz_origins
WHERE host BETWEEN :searchString AND :searchString || X'FFFF'
GROUP BY host
HAVING host_frecency >= :frecencyThreshold
UNION ALL
SELECT host,
TOTAL(frecency) AS host_frecency,
(SELECT TOTAL(foreign_count) > 0 FROM moz_places
WHERE moz_places.origin_id = moz_origins.id) AS bookmarked
FROM moz_origins
WHERE host BETWEEN 'www.' || :searchString AND 'www.' || :searchString || X'FFFF'
GROUP BY host
HAVING host_frecency >= :frecencyThreshold
) AS grouped_hosts
JOIN moz_origins ON moz_origins.host = grouped_hosts.host
ORDER BY frecency DESC, id DESC
LIMIT 1
";
impl Matcher for OriginOrUrl<'_> {
fn search(&self, conn: &PlacesDb, _: u32) -> Result<Vec<SearchResult>> {
Ok(if looks_like_origin(self.query) {
query_flat_rows_and_then(
conn,
ORIGIN_SQL,
&[
(":prefix", &rusqlite::types::Null as &dyn rusqlite::ToSql),
(":searchString", &self.query),
(":frecencyThreshold", &-1i64),
],
SearchResult::from_origin_row,
)?
} else if self.query.contains(['/', ':', '?']) {
let (host, remainder) = split_after_host_and_port(self.query);
let punycode_host = idna::domain_to_ascii(host);
let host_str = if let Ok(host) = &punycode_host {
host.as_str()
} else {
return Ok(vec![]);
};
query_flat_rows_and_then(
conn,
URL_SQL,
&[
(":searchString", &self.query as &dyn rusqlite::ToSql),
(":host", &host_str),
(":remainder", &remainder),
(":frecencyThreshold", &-1i64),
],
SearchResult::from_url_row,
)?
} else {
vec![]
})
}
}
struct Adaptive<'query> {
query: &'query str,
match_behavior: MatchBehavior,
search_behavior: SearchBehavior,
}
impl<'query> Adaptive<'query> {
pub fn with_behavior(
query: &'query str,
match_behavior: MatchBehavior,
search_behavior: SearchBehavior,
) -> Adaptive<'query> {
Adaptive {
query,
match_behavior,
search_behavior,
}
}
}
impl Matcher for Adaptive<'_> {
fn search(&self, conn: &PlacesDb, max_results: u32) -> Result<Vec<SearchResult>> {
query_flat_rows_and_then(
conn,
"
SELECT h.url as url,
h.title as title,
EXISTS(SELECT 1 FROM moz_bookmarks
WHERE fk = h.id) AS bookmarked,
(SELECT title FROM moz_bookmarks
WHERE fk = h.id AND
title NOT NULL
ORDER BY lastModified DESC
LIMIT 1) AS btitle,
NULL AS tags,
h.visit_count_local + h.visit_count_remote AS visit_count,
h.typed as typed,
h.id as id,
NULL AS open_count,
h.frecency as frecency,
:searchString AS searchString
FROM (
SELECT ROUND(MAX(use_count) * (1 + (input = :searchString)), 1) AS rank,
place_id
FROM moz_inputhistory
WHERE input BETWEEN :searchString AND :searchString || X'FFFF'
GROUP BY place_id
) AS i
JOIN moz_places h ON h.id = i.place_id
WHERE AUTOCOMPLETE_MATCH(:searchString, h.url,
IFNULL(btitle, h.title), tags,
visit_count, h.typed, bookmarked,
NULL, :matchBehavior, :searchBehavior)
ORDER BY rank DESC, h.frecency DESC
LIMIT :maxResults",
&[
(":searchString", &self.query as &dyn rusqlite::ToSql),
(":matchBehavior", &self.match_behavior),
(":searchBehavior", &self.search_behavior),
(":maxResults", &max_results),
],
SearchResult::from_adaptive_row,
)
}
}
struct Suggestions<'query> {
query: &'query str,
match_behavior: MatchBehavior,
search_behavior: SearchBehavior,
}
impl<'query> Suggestions<'query> {
pub fn with_behavior(
query: &'query str,
match_behavior: MatchBehavior,
search_behavior: SearchBehavior,
) -> Suggestions<'query> {
Suggestions {
query,
match_behavior,
search_behavior,
}
}
}
impl Matcher for Suggestions<'_> {
fn search(&self, conn: &PlacesDb, max_results: u32) -> Result<Vec<SearchResult>> {
query_flat_rows_and_then(
conn,
"
SELECT h.url, h.title,
EXISTS(SELECT 1 FROM moz_bookmarks
WHERE fk = h.id) AS bookmarked,
(SELECT title FROM moz_bookmarks
WHERE fk = h.id AND
title NOT NULL
ORDER BY lastModified DESC
LIMIT 1) AS btitle,
NULL AS tags,
h.visit_count_local + h.visit_count_remote AS visit_count,
h.typed as typed,
h.id as id,
NULL AS open_count, h.frecency, :searchString AS searchString
FROM moz_places h
WHERE h.frecency > 0
AND AUTOCOMPLETE_MATCH(:searchString, h.url,
IFNULL(btitle, h.title), tags,
visit_count, h.typed,
bookmarked, NULL,
:matchBehavior, :searchBehavior)
AND (+h.visit_count_local > 0 OR +h.visit_count_remote > 0)
ORDER BY h.frecency DESC, h.id DESC
LIMIT :maxResults",
&[
(":searchString", &self.query as &dyn rusqlite::ToSql),
(":matchBehavior", &self.match_behavior),
(":searchBehavior", &self.search_behavior),
(":maxResults", &max_results),
],
SearchResult::from_suggestion_row,
)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::api::places_api::test::new_mem_connection;
use crate::observation::VisitObservation;
use crate::storage::history::apply_observation;
use crate::types::VisitType;
use types::Timestamp;
#[test]
fn split() {
assert_eq!(
split_after_prefix("http://example.com"),
("http://", "example.com")
);
assert_eq!(split_after_prefix("foo:example"), ("foo:", "example"));
assert_eq!(split_after_prefix("foo:"), ("foo:", ""));
assert_eq!(split_after_prefix("notaspec"), ("", "notaspec"));
assert_eq!(split_after_prefix("http:/"), ("http:", "/"));
assert_eq!(split_after_prefix("http://"), ("http://", ""));
assert_eq!(
split_after_host_and_port("http://example.com/"),
("example.com", "/")
);
assert_eq!(
split_after_host_and_port("http://example.com:8888/"),
("example.com:8888", "/")
);
assert_eq!(
split_after_host_and_port("http://user:pass@example.com/"),
("example.com", "/")
);
assert_eq!(split_after_host_and_port("foo:example"), ("example", ""));
assert_eq!(
split_after_host_and_port("http://foo.com/stuff/@21.3132115"),
("foo.com", "/stuff/@21.3132115")
);
assert_eq!(
split_after_host_and_port("http://foo.com/go?email=foo@example.com"),
("foo.com", "/go?email=foo@example.com")
);
assert_eq!(
split_after_host_and_port("http://a:b@foo.com/stuff/@21.3132115"),
("foo.com", "/stuff/@21.3132115")
);
assert_eq!(
split_after_host_and_port("http://a:b@foo.com/123#abcdef@title"),
("foo.com", "/123#abcdef@title")
);
}
#[test]
fn search() {
let conn = new_mem_connection();
let url = Url::parse("http://example.com/123").unwrap();
let visit = VisitObservation::new(url.clone())
.with_title("Example page 123".to_string())
.with_visit_type(VisitType::Typed)
.with_at(Timestamp::now());
apply_observation(&conn, visit).expect("Should apply visit");
let by_origin = search_frecent(
&conn,
SearchParams {
search_string: "example.com".into(),
limit: 10,
},
)
.expect("Should search by origin");
assert!(by_origin
.iter()
.any(|result| result.search_string == "example.com"
&& result.title == "example.com/"
&& result.url.as_str() == "http://example.com/"));
let by_url_without_path = search_frecent(
&conn,
SearchParams {
search_string: "http://example.com".into(),
limit: 10,
},
)
.expect("Should search by URL without path");
assert!(by_url_without_path
.iter()
.any(|result| result.title == "example.com/"
&& result.url.as_str() == "http://example.com/"));
let by_url_with_path = search_frecent(
&conn,
SearchParams {
search_string: "http://example.com/1".into(),
limit: 10,
},
)
.expect("Should search by URL with path");
assert!(by_url_with_path
.iter()
.any(|result| result.title == "example.com/123"
&& result.url.as_str() == "http://example.com/123"));
accept_result(&conn, "ample", &url).expect("Should accept input history match");
let by_adaptive = search_frecent(
&conn,
SearchParams {
search_string: "ample".into(),
limit: 10,
},
)
.expect("Should search by adaptive input history");
assert!(by_adaptive
.iter()
.any(|result| result.search_string == "ample" && result.url == url));
let with_limit = search_frecent(
&conn,
SearchParams {
search_string: "example".into(),
limit: 1,
},
)
.expect("Should search until reaching limit");
assert_eq!(
with_limit,
vec![SearchResult {
search_string: "example".into(),
url: Url::parse("http://example.com/").unwrap(),
title: "example.com/".into(),
icon_url: None,
frecency: 2000,
}]
);
}
#[test]
fn search_unicode() {
let conn = new_mem_connection();
let url = Url::parse("http://exämple.com/123").unwrap();
let visit = VisitObservation::new(url)
.with_title("Example page 123".to_string())
.with_visit_type(VisitType::Typed)
.with_at(Timestamp::now());
apply_observation(&conn, visit).expect("Should apply visit");
let by_url_without_path = search_frecent(
&conn,
SearchParams {
search_string: "http://exämple.com".into(),
limit: 10,
},
)
.expect("Should search by URL without path");
assert!(by_url_without_path
.iter()
.any(|result| result.title == "xn--exmple-cua.com/"
&& result.url.as_str() == "http://xn--exmple-cua.com/"));
let by_url_with_path = search_frecent(
&conn,
SearchParams {
search_string: "http://exämple.com/1".into(),
limit: 10,
},
)
.expect("Should search by URL with path");
assert!(
by_url_with_path
.iter()
.any(|result| result.title == "xn--exmple-cua.com/123"
&& result.url.as_str() == "http://xn--exmple-cua.com/123"),
"{:?}",
by_url_with_path
);
let ball_of_yarn_about_blank = "about:blank🧶";
let empty = match_url(&conn, ball_of_yarn_about_blank).unwrap();
assert!(empty.is_none());
search_frecent(
&conn,
SearchParams {
search_string: ball_of_yarn_about_blank.into(),
limit: 10,
},
)
.unwrap();
}
#[test]
#[cfg_attr(
debug_assertions,
should_panic(expected = "Failed to perform a search:")
)]
fn search_invalid_url() {
let conn = new_mem_connection();
conn.execute(
"INSERT INTO moz_places (guid, url, url_hash, frecency)
VALUES ('fake_guid___', 'not-a-url', hash('not-a-url'), 10)",
[],
)
.expect("should insert");
crate::storage::delete_pending_temp_tables(&conn).expect("should work");
let _ = search_frecent(
&conn,
SearchParams {
search_string: "not-a-url".into(),
limit: 10,
},
);
}
}