use crate::error::*;
use crate::types::VisitType;
use error_support::trace_error;
use rusqlite::Connection;
use types::Timestamp;
#[derive(Debug, Clone, Copy, PartialEq)]
enum RedirectBonus {
Unknown,
Redirect,
Normal,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FrecencySettings {
pub num_visits: i32, pub first_bucket_cutoff_days: i32, pub second_bucket_cutoff_days: i32, pub third_bucket_cutoff_days: i32, pub fourth_bucket_cutoff_days: i32, pub first_bucket_weight: i32, pub second_bucket_weight: i32, pub third_bucket_weight: i32, pub fourth_bucket_weight: i32, pub default_bucket_weight: i32, pub embed_visit_bonus: i32, pub framed_link_visit_bonus: i32, pub link_visit_bonus: i32, pub typed_visit_bonus: i32, pub bookmark_visit_bonus: i32, pub download_visit_bonus: i32, pub permanent_redirect_visit_bonus: i32, pub temporary_redirect_visit_bonus: i32, pub redirect_source_visit_bonus: i32, pub default_visit_bonus: i32, pub unvisited_bookmark_bonus: i32, pub unvisited_typed_bonus: i32, pub reload_visit_bonus: i32, }
pub const DEFAULT_FRECENCY_SETTINGS: FrecencySettings = FrecencySettings {
num_visits: 10,
first_bucket_cutoff_days: 4,
second_bucket_cutoff_days: 14,
third_bucket_cutoff_days: 31,
fourth_bucket_cutoff_days: 90,
first_bucket_weight: 100,
second_bucket_weight: 70,
third_bucket_weight: 50,
fourth_bucket_weight: 30,
default_bucket_weight: 10,
embed_visit_bonus: 0,
framed_link_visit_bonus: 0,
link_visit_bonus: 100,
typed_visit_bonus: 2000,
bookmark_visit_bonus: 75,
download_visit_bonus: 0,
permanent_redirect_visit_bonus: 0,
temporary_redirect_visit_bonus: 0,
redirect_source_visit_bonus: 25,
default_visit_bonus: 0,
unvisited_bookmark_bonus: 140,
unvisited_typed_bonus: 200,
reload_visit_bonus: 0,
};
impl Default for FrecencySettings {
#[inline]
fn default() -> Self {
DEFAULT_FRECENCY_SETTINGS
}
}
impl FrecencySettings {
pub fn get_transition_bonus(
&self,
visit_type: Option<VisitType>,
visited: bool,
redirect: bool,
) -> i32 {
if redirect {
return self.redirect_source_visit_bonus;
}
match (visit_type, visited) {
(Some(VisitType::Link), _) => self.link_visit_bonus,
(Some(VisitType::Embed), _) => self.embed_visit_bonus,
(Some(VisitType::FramedLink), _) => self.framed_link_visit_bonus,
(Some(VisitType::RedirectPermanent), _) => self.temporary_redirect_visit_bonus,
(Some(VisitType::RedirectTemporary), _) => self.permanent_redirect_visit_bonus,
(Some(VisitType::Download), _) => self.download_visit_bonus,
(Some(VisitType::Reload), _) => self.reload_visit_bonus,
(Some(VisitType::Typed), true) => self.typed_visit_bonus,
(Some(VisitType::Typed), false) => self.unvisited_typed_bonus,
(Some(VisitType::Bookmark), true) => self.bookmark_visit_bonus,
(Some(VisitType::Bookmark), false) => self.unvisited_bookmark_bonus,
(Some(VisitType::UpdatePlace), _) => self.default_visit_bonus,
(None, _) => self.default_visit_bonus,
}
}
fn get_frecency_aged_weight(&self, age_in_days: i32) -> i32 {
if age_in_days <= self.first_bucket_cutoff_days {
self.first_bucket_weight
} else if age_in_days <= self.second_bucket_cutoff_days {
self.second_bucket_weight
} else if age_in_days <= self.third_bucket_cutoff_days {
self.third_bucket_weight
} else if age_in_days <= self.fourth_bucket_cutoff_days {
self.fourth_bucket_weight
} else {
self.default_bucket_weight
}
}
}
struct FrecencyComputation<'db, 's> {
conn: &'db Connection,
settings: &'s FrecencySettings,
page_id: i64,
most_recent_redirect_bonus: RedirectBonus,
typed: i32,
visit_count: i32,
foreign_count: i32,
is_query: bool,
}
impl<'db, 's> FrecencyComputation<'db, 's> {
fn new(
conn: &'db Connection,
settings: &'s FrecencySettings,
page_id: i64,
most_recent_redirect_bonus: RedirectBonus,
) -> Result<Self> {
let mut stmt = conn.prepare_cached(
"
SELECT
typed,
(visit_count_local + visit_count_remote) as visit_count,
foreign_count,
(substr(url, 0, 7) = 'place:') as is_query
FROM moz_places
WHERE id = :page_id
",
)?;
let mut rows = stmt.query(&[(":page_id", &page_id)])?;
let row = trace_error!(rows.next()?.ok_or(rusqlite::Error::QueryReturnedNoRows))?;
let typed: i32 = row.get("typed")?;
let visit_count: i32 = row.get("visit_count")?;
let foreign_count: i32 = row.get("foreign_count")?;
let is_query: bool = row.get("is_query")?;
Ok(Self {
conn,
settings,
page_id,
most_recent_redirect_bonus,
typed,
visit_count,
foreign_count,
is_query,
})
}
fn has_bookmark(&self) -> bool {
self.foreign_count > 0
}
fn score_recent_visits(&self) -> Result<(usize, f32)> {
let get_recent_visits = format!(
"SELECT
IFNULL(origin.visit_type, v.visit_type) AS visit_type,
target.visit_type AS target_visit_type,
v.visit_date
FROM moz_historyvisits v
LEFT JOIN moz_historyvisits origin ON origin.id = v.from_visit
AND v.visit_type IN ({redirect_permanent}, {redirect_temporary})
LEFT JOIN moz_historyvisits target ON v.id = target.from_visit
AND target.visit_type IN ({redirect_permanent}, {redirect_temporary})
WHERE v.place_id = :page_id
ORDER BY v.visit_date DESC
LIMIT {max_visits}",
redirect_permanent = VisitType::RedirectPermanent as u8,
redirect_temporary = VisitType::RedirectTemporary as u8,
max_visits = self.settings.num_visits,
);
let mut stmt = self.conn.prepare_cached(&get_recent_visits)?;
let now = Timestamp::now();
let row_iter = stmt.query_and_then(
&[(":page_id", &self.page_id)],
|row| -> rusqlite::Result<_> {
let visit_type = row.get::<_, Option<u8>>("visit_type")?.unwrap_or(0);
let target_visit_type = row.get::<_, Option<u8>>("target_visit_type")?.unwrap_or(0);
let visit_date: Timestamp = row.get("visit_date")?;
let age_in_days =
(now.as_millis() as f64 - visit_date.as_millis() as f64) / 86_400_000.0;
Ok((
VisitType::from_primitive(visit_type),
VisitType::from_primitive(target_visit_type),
age_in_days.round() as i32,
))
},
)?;
let mut num_sampled_visits = 0;
let mut points_for_sampled_visits = 0.0f32;
for row_result in row_iter {
let (visit_type, target_visit_type, age_in_days) = row_result?;
let use_redirect_bonus = if self.most_recent_redirect_bonus == RedirectBonus::Unknown
|| num_sampled_visits > 0
{
target_visit_type == Some(VisitType::RedirectPermanent)
|| (target_visit_type == Some(VisitType::RedirectTemporary)
&& visit_type != Some(VisitType::Typed))
} else {
self.most_recent_redirect_bonus == RedirectBonus::Redirect
};
let mut bonus =
self.settings
.get_transition_bonus(visit_type, true, use_redirect_bonus);
if self.has_bookmark() {
bonus += self
.settings
.get_transition_bonus(Some(VisitType::Bookmark), true, false);
}
if bonus != 0 {
let weight = self.settings.get_frecency_aged_weight(age_in_days) as f32;
points_for_sampled_visits += weight * (bonus as f32 / 100.0)
}
num_sampled_visits += 1;
}
Ok((num_sampled_visits, points_for_sampled_visits))
}
fn get_frecency_for_sample(&self, num_sampled: usize, score: f32) -> i32 {
if score == 0.0f32 {
-1
} else {
((self.visit_count as f32) * score.ceil() / (num_sampled as f32)).ceil() as i32
}
}
fn compute_unvisited_bookmark_frecency(&self) -> i32 {
let mut bonus = self
.settings
.get_transition_bonus(Some(VisitType::Bookmark), false, false);
if self.typed != 0 {
bonus += self
.settings
.get_transition_bonus(Some(VisitType::Typed), false, false);
}
let score = (self.settings.first_bucket_weight as f32) * (bonus as f32 / 100.0f32);
score.ceil() as i32
}
}
pub fn calculate_frecency(
db: &Connection,
settings: &FrecencySettings,
page_id: i64,
is_redirect: Option<bool>,
) -> Result<i32> {
assert!(page_id > 0, "calculate_frecency given invalid page_id");
let most_recent_redirect_bonus = match is_redirect {
None => RedirectBonus::Unknown,
Some(true) => RedirectBonus::Redirect,
Some(false) => RedirectBonus::Normal,
};
let fc = FrecencyComputation::new(db, settings, page_id, most_recent_redirect_bonus)?;
let (num_sampled_visits, sample_score) = if fc.visit_count > 0 {
fc.score_recent_visits()?
} else {
(0, 0.0f32)
};
Ok(if num_sampled_visits > 0 {
fc.get_frecency_for_sample(num_sampled_visits, sample_score)
} else if !fc.has_bookmark() || fc.is_query {
0
} else {
fc.compute_unvisited_bookmark_frecency()
})
}