places/
frecency.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use crate::error::*;
6use crate::types::VisitType;
7use error_support::trace_error;
8use rusqlite::Connection;
9use types::Timestamp;
10
11#[derive(Debug, Clone, Copy, PartialEq)]
12enum RedirectBonus {
13    Unknown,
14    Redirect,
15    Normal,
16}
17
18#[derive(Debug, Clone, PartialEq, Eq)]
19pub struct FrecencySettings {
20    // TODO: These probably should not all be i32s...
21    pub num_visits: i32,                     // from "places.frecency.numVisits"
22    pub first_bucket_cutoff_days: i32,       // from "places.frecency.firstBucketCutoff"
23    pub second_bucket_cutoff_days: i32,      // from "places.frecency.secondBucketCutoff"
24    pub third_bucket_cutoff_days: i32,       // from "places.frecency.thirdBucketCutoff"
25    pub fourth_bucket_cutoff_days: i32,      // from "places.frecency.fourthBucketCutoff"
26    pub first_bucket_weight: i32,            // from "places.frecency.firstBucketWeight"
27    pub second_bucket_weight: i32,           // from "places.frecency.secondBucketWeight"
28    pub third_bucket_weight: i32,            // from "places.frecency.thirdBucketWeight"
29    pub fourth_bucket_weight: i32,           // from "places.frecency.fourthBucketWeight"
30    pub default_bucket_weight: i32,          // from "places.frecency.defaultBucketWeight"
31    pub embed_visit_bonus: i32,              // from "places.frecency.embedVisitBonus"
32    pub framed_link_visit_bonus: i32,        // from "places.frecency.framedLinkVisitBonus"
33    pub link_visit_bonus: i32,               // from "places.frecency.linkVisitBonus"
34    pub typed_visit_bonus: i32,              // from "places.frecency.typedVisitBonus"
35    pub bookmark_visit_bonus: i32,           // from "places.frecency.bookmarkVisitBonus"
36    pub download_visit_bonus: i32,           // from "places.frecency.downloadVisitBonus"
37    pub permanent_redirect_visit_bonus: i32, // from "places.frecency.permRedirectVisitBonus"
38    pub temporary_redirect_visit_bonus: i32, // from "places.frecency.tempRedirectVisitBonus"
39    pub redirect_source_visit_bonus: i32,    // from "places.frecency.redirectSourceVisitBonus"
40    pub default_visit_bonus: i32,            // from "places.frecency.defaultVisitBonus"
41    pub unvisited_bookmark_bonus: i32,       // from "places.frecency.unvisitedBookmarkBonus"
42    pub unvisited_typed_bonus: i32,          // from "places.frecency.unvisitedTypedBonus"
43    pub reload_visit_bonus: i32,             // from "places.frecency.reloadVisitBonus"
44}
45
46pub const DEFAULT_FRECENCY_SETTINGS: FrecencySettings = FrecencySettings {
47    // These are the default values of the preferences.
48    num_visits: 10,
49    first_bucket_cutoff_days: 4,
50    second_bucket_cutoff_days: 14,
51    third_bucket_cutoff_days: 31,
52    fourth_bucket_cutoff_days: 90,
53    first_bucket_weight: 100,
54    second_bucket_weight: 70,
55    third_bucket_weight: 50,
56    fourth_bucket_weight: 30,
57    default_bucket_weight: 10,
58    embed_visit_bonus: 0,
59    framed_link_visit_bonus: 0,
60    link_visit_bonus: 100,
61    typed_visit_bonus: 2000,
62    bookmark_visit_bonus: 75,
63    download_visit_bonus: 0,
64    permanent_redirect_visit_bonus: 0,
65    temporary_redirect_visit_bonus: 0,
66    redirect_source_visit_bonus: 25,
67    default_visit_bonus: 0,
68    unvisited_bookmark_bonus: 140,
69    unvisited_typed_bonus: 200,
70    reload_visit_bonus: 0,
71};
72
73impl Default for FrecencySettings {
74    #[inline]
75    fn default() -> Self {
76        DEFAULT_FRECENCY_SETTINGS
77    }
78}
79
80impl FrecencySettings {
81    // Note: in Places, `redirect` defaults to false.
82    pub fn get_transition_bonus(
83        &self,
84        visit_type: Option<VisitType>,
85        visited: bool,
86        redirect: bool,
87    ) -> i32 {
88        if redirect {
89            return self.redirect_source_visit_bonus;
90        }
91        match (visit_type, visited) {
92            (Some(VisitType::Link), _) => self.link_visit_bonus,
93            (Some(VisitType::Embed), _) => self.embed_visit_bonus,
94            (Some(VisitType::FramedLink), _) => self.framed_link_visit_bonus,
95            (Some(VisitType::RedirectPermanent), _) => self.temporary_redirect_visit_bonus,
96            (Some(VisitType::RedirectTemporary), _) => self.permanent_redirect_visit_bonus,
97            (Some(VisitType::Download), _) => self.download_visit_bonus,
98            (Some(VisitType::Reload), _) => self.reload_visit_bonus,
99            (Some(VisitType::Typed), true) => self.typed_visit_bonus,
100            (Some(VisitType::Typed), false) => self.unvisited_typed_bonus,
101            (Some(VisitType::Bookmark), true) => self.bookmark_visit_bonus,
102            (Some(VisitType::Bookmark), false) => self.unvisited_bookmark_bonus,
103            (Some(VisitType::UpdatePlace), _) => self.default_visit_bonus,
104            // 0 == undefined (see bug 375777 in bugzilla for details)
105            (None, _) => self.default_visit_bonus,
106        }
107    }
108
109    fn get_frecency_aged_weight(&self, age_in_days: i32) -> i32 {
110        if age_in_days <= self.first_bucket_cutoff_days {
111            self.first_bucket_weight
112        } else if age_in_days <= self.second_bucket_cutoff_days {
113            self.second_bucket_weight
114        } else if age_in_days <= self.third_bucket_cutoff_days {
115            self.third_bucket_weight
116        } else if age_in_days <= self.fourth_bucket_cutoff_days {
117            self.fourth_bucket_weight
118        } else {
119            self.default_bucket_weight
120        }
121    }
122}
123
124struct FrecencyComputation<'db, 's> {
125    conn: &'db Connection,
126    settings: &'s FrecencySettings,
127    page_id: i64,
128    most_recent_redirect_bonus: RedirectBonus,
129
130    typed: i32,
131    visit_count: i32,
132    foreign_count: i32,
133    is_query: bool,
134}
135
136impl<'db, 's> FrecencyComputation<'db, 's> {
137    fn new(
138        conn: &'db Connection,
139        settings: &'s FrecencySettings,
140        page_id: i64,
141        most_recent_redirect_bonus: RedirectBonus,
142    ) -> Result<Self> {
143        let mut stmt = conn.prepare_cached(
144            "
145            SELECT
146                typed,
147                (visit_count_local + visit_count_remote) as visit_count,
148                foreign_count,
149                (substr(url, 0, 7) = 'place:') as is_query
150            FROM moz_places
151            WHERE id = :page_id
152        ",
153        )?;
154        let mut rows = stmt.query(&[(":page_id", &page_id)])?;
155        // trace_error to track down #4856
156        let row = trace_error!(rows.next()?.ok_or(rusqlite::Error::QueryReturnedNoRows))?;
157        let typed: i32 = row.get("typed")?;
158        let visit_count: i32 = row.get("visit_count")?;
159        let foreign_count: i32 = row.get("foreign_count")?;
160        let is_query: bool = row.get("is_query")?;
161
162        Ok(Self {
163            conn,
164            settings,
165            page_id,
166            most_recent_redirect_bonus,
167            typed,
168            visit_count,
169            foreign_count,
170            is_query,
171        })
172    }
173
174    fn has_bookmark(&self) -> bool {
175        self.foreign_count > 0
176    }
177
178    fn score_recent_visits(&self) -> Result<(usize, f32)> {
179        // Get a sample of the last visits to the page, to calculate its weight.
180        // In case the visit is a redirect target, calculate the frecency
181        // as if the original page was visited.
182        // If it's a redirect source, we may want to use a lower bonus.
183        let get_recent_visits = format!(
184            "SELECT
185                 IFNULL(origin.visit_type, v.visit_type) AS visit_type,
186                 target.visit_type AS target_visit_type,
187                 v.visit_date
188             FROM moz_historyvisits v
189             LEFT JOIN moz_historyvisits origin ON origin.id = v.from_visit
190                 AND v.visit_type IN ({redirect_permanent}, {redirect_temporary})
191             LEFT JOIN moz_historyvisits target ON v.id = target.from_visit
192                 AND target.visit_type IN ({redirect_permanent}, {redirect_temporary})
193             WHERE v.place_id = :page_id
194             ORDER BY v.visit_date DESC
195             LIMIT {max_visits}",
196            redirect_permanent = VisitType::RedirectPermanent as u8,
197            redirect_temporary = VisitType::RedirectTemporary as u8,
198            // in practice this is constant, so caching the query is fine.
199            // (rusqlite has a max cache size too should things change)
200            max_visits = self.settings.num_visits,
201        );
202
203        let mut stmt = self.conn.prepare_cached(&get_recent_visits)?;
204
205        let now = Timestamp::now();
206
207        let row_iter = stmt.query_and_then(
208            &[(":page_id", &self.page_id)],
209            |row| -> rusqlite::Result<_> {
210                let visit_type = row.get::<_, Option<u8>>("visit_type")?.unwrap_or(0);
211                let target_visit_type = row.get::<_, Option<u8>>("target_visit_type")?.unwrap_or(0);
212                let visit_date: Timestamp = row.get("visit_date")?;
213                let age_in_days =
214                    (now.as_millis() as f64 - visit_date.as_millis() as f64) / 86_400_000.0;
215                Ok((
216                    VisitType::from_primitive(visit_type),
217                    VisitType::from_primitive(target_visit_type),
218                    age_in_days.round() as i32,
219                ))
220            },
221        )?;
222
223        let mut num_sampled_visits = 0;
224        let mut points_for_sampled_visits = 0.0f32;
225
226        for row_result in row_iter {
227            let (visit_type, target_visit_type, age_in_days) = row_result?;
228            // When adding a new visit, we should haved passed-in whether we should
229            // use the redirect bonus. We can't fetch this information from the
230            // database, because we only store redirect targets.
231            // For older visits we extract the value from the database.
232            let use_redirect_bonus = if self.most_recent_redirect_bonus == RedirectBonus::Unknown
233                || num_sampled_visits > 0
234            {
235                target_visit_type == Some(VisitType::RedirectPermanent)
236                    || (target_visit_type == Some(VisitType::RedirectTemporary)
237                        && visit_type != Some(VisitType::Typed))
238            } else {
239                self.most_recent_redirect_bonus == RedirectBonus::Redirect
240            };
241
242            let mut bonus =
243                self.settings
244                    .get_transition_bonus(visit_type, true, use_redirect_bonus);
245
246            if self.has_bookmark() {
247                bonus += self
248                    .settings
249                    .get_transition_bonus(Some(VisitType::Bookmark), true, false);
250            }
251            if bonus != 0 {
252                let weight = self.settings.get_frecency_aged_weight(age_in_days) as f32;
253                points_for_sampled_visits += weight * (bonus as f32 / 100.0)
254            }
255            num_sampled_visits += 1;
256        }
257
258        Ok((num_sampled_visits, points_for_sampled_visits))
259    }
260
261    fn get_frecency_for_sample(&self, num_sampled: usize, score: f32) -> i32 {
262        if score == 0.0f32 {
263            // We were unable to calculate points, maybe cause all the visits in the
264            // sample had a zero bonus. Though, we know the page has some past valid
265            // visit, or visit_count would be zero. Thus we set the frecency to
266            // -1, so they are still shown in autocomplete.
267            -1
268        } else {
269            // Estimate frecency using the sampled visits.
270            // Use ceil() so that we don't round down to 0, which
271            // would cause us to completely ignore the place during autocomplete.
272            ((self.visit_count as f32) * score.ceil() / (num_sampled as f32)).ceil() as i32
273        }
274    }
275
276    fn compute_unvisited_bookmark_frecency(&self) -> i32 {
277        // Make it so something bookmarked and typed will have a higher frecency
278        // than something just typed or just bookmarked.
279        let mut bonus = self
280            .settings
281            .get_transition_bonus(Some(VisitType::Bookmark), false, false);
282        if self.typed != 0 {
283            bonus += self
284                .settings
285                .get_transition_bonus(Some(VisitType::Typed), false, false);
286        }
287
288        // Assume "now" as our age_in_days, so use the first bucket.
289        let score = (self.settings.first_bucket_weight as f32) * (bonus as f32 / 100.0f32);
290
291        // use ceil() so that we don't round down to 0, which
292        // would cause us to completely ignore the place during autocomplete
293        score.ceil() as i32
294    }
295}
296
297pub fn calculate_frecency(
298    db: &Connection,
299    settings: &FrecencySettings,
300    page_id: i64,
301    is_redirect: Option<bool>,
302) -> Result<i32> {
303    assert!(page_id > 0, "calculate_frecency given invalid page_id");
304
305    let most_recent_redirect_bonus = match is_redirect {
306        None => RedirectBonus::Unknown,
307        Some(true) => RedirectBonus::Redirect,
308        Some(false) => RedirectBonus::Normal,
309    };
310
311    let fc = FrecencyComputation::new(db, settings, page_id, most_recent_redirect_bonus)?;
312
313    let (num_sampled_visits, sample_score) = if fc.visit_count > 0 {
314        fc.score_recent_visits()?
315    } else {
316        (0, 0.0f32)
317    };
318
319    Ok(if num_sampled_visits > 0 {
320        // If we sampled some visits for this page, use the calculated weight.
321        fc.get_frecency_for_sample(num_sampled_visits, sample_score)
322    } else if !fc.has_bookmark() || fc.is_query {
323        // Otherwise, this page has no visits, it may be bookmarked.
324        0
325    } else {
326        // For unvisited bookmarks, produce a non-zero frecency, so that they show
327        // up in URL bar autocomplete.
328        fc.compute_unvisited_bookmark_frecency()
329    })
330}