error_support/
error_tracing.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use std::{
6    collections::HashMap,
7    time::{Duration, Instant, SystemTime},
8};
9
10use parking_lot::Mutex;
11
12static GLOBALS: Mutex<Globals> = Mutex::new(Globals::new());
13
14pub fn report_error_to_app(type_name: String, details: String) {
15    // First step: perform all actions that we need the lock for.
16    let breadcrumbs = {
17        let mut globals = GLOBALS.lock();
18        if !globals
19            .rate_limiter
20            .should_send_report(&type_name, Instant::now())
21        {
22            return;
23        }
24        globals.breadcrumbs.get_breadcrumbs()
25    };
26    // Report errors by sending a tracing event to the `app-services-error-reporter::error` target.
27    //
28    // Applications should register for these events and send a glean error ping when they occur.
29    //
30    // breadcrumbs will be sent in the `breadcrumbs` field as a single string, with each individual
31    // breadcrumb joined by newlines.
32    let breadcrumbs = breadcrumbs.join("\n");
33    let details = truncate_details(details);
34    tracing_support::error!(target: "app-services-error-reporter::error", details, type_name, breadcrumbs);
35}
36
37pub fn report_breadcrumb(message: String, module: String, line: u32, column: u32) {
38    // When we see a breadcrumb:
39    //   - Push it to the `RECENT_BREADCRUMBS` list
40    //   - Send out the `app-services-error-reporter::breadcrumb`.  Applications can register for
41    //     these events and log them.
42    GLOBALS
43        .lock()
44        .breadcrumbs
45        .push(format_breadcrumb_and_timestamp(&message, SystemTime::now()));
46    tracing_support::info!(target: "app-services-error-reporter::breadcrumb", message, module, line, column);
47}
48
49fn format_breadcrumb_and_timestamp(message: &str, time: SystemTime) -> String {
50    let timestamp = match time.duration_since(SystemTime::UNIX_EPOCH) {
51        Ok(n) => n.as_secs(),
52        Err(_) => 0,
53    };
54    format!("{message} ({timestamp})")
55}
56
57// Global structs used for error reporting
58struct Globals {
59    breadcrumbs: BreadcrumbRingBuffer,
60    rate_limiter: RateLimiter,
61}
62
63impl Globals {
64    const fn new() -> Self {
65        Self {
66            breadcrumbs: BreadcrumbRingBuffer::new(),
67            rate_limiter: RateLimiter::new(),
68        }
69    }
70}
71
72/// Ring buffer implementation that we use to store the most recent 20 breadcrumbs
73#[derive(Default)]
74struct BreadcrumbRingBuffer {
75    breadcrumbs: Vec<String>,
76    pos: usize,
77}
78
79impl BreadcrumbRingBuffer {
80    const MAX_ITEMS: usize = 20;
81
82    const fn new() -> Self {
83        Self {
84            breadcrumbs: Vec::new(),
85            pos: 0,
86        }
87    }
88
89    fn push(&mut self, breadcrumb: impl Into<String>) {
90        let breadcrumb = truncate_breadcrumb(breadcrumb.into());
91        if self.breadcrumbs.len() < Self::MAX_ITEMS {
92            self.breadcrumbs.push(breadcrumb);
93        } else {
94            self.breadcrumbs[self.pos] = breadcrumb;
95            self.pos = (self.pos + 1) % Self::MAX_ITEMS;
96        }
97    }
98
99    fn get_breadcrumbs(&self) -> Vec<String> {
100        let mut breadcrumbs = Vec::from(&self.breadcrumbs[self.pos..]);
101        breadcrumbs.extend(self.breadcrumbs[..self.pos].iter().map(|s| s.to_string()));
102        breadcrumbs
103    }
104}
105
106fn truncate_details(details: String) -> String {
107    // Limit details to 255 chars so that they fit in a Glean String list
108    // (https://mozilla.github.io/glean/book/reference/metrics/string.html)
109    truncate_string(details, 255)
110}
111
112fn truncate_breadcrumb(breadcrumb: String) -> String {
113    // Limit breadcrumbs to 100 chars so that they fit in a Glean String list
114    // (https://mozilla.github.io/glean/book/reference/metrics/string_list.html)
115    truncate_string(breadcrumb, 100)
116}
117
118fn truncate_string(value: String, max_len: usize) -> String {
119    if value.len() <= max_len {
120        return value;
121    }
122    let split_point = (0..=max_len)
123        .rev()
124        .find(|i| value.is_char_boundary(*i))
125        .unwrap_or(0);
126    value[0..split_point].to_string()
127}
128
129/// Rate-limits error reports per component by type to a max of 20/hr
130///
131/// This uses the simplest algorithm possible.  We could use something like a token bucket to allow
132/// for a small burst of errors, but that doesn't seem so useful.  In that scenario, the first
133/// error report is the one we want to fix.
134struct RateLimiter {
135    // Optional so we can make `new()` const.
136    last_report: Option<HashMap<String, Instant>>,
137}
138
139impl RateLimiter {
140    // Rate limit reports if they're within 3 minutes of each other.
141    const INTERVAL: Duration = Duration::from_secs(180);
142
143    const fn new() -> Self {
144        Self { last_report: None }
145    }
146
147    fn should_send_report(&mut self, error_type: &str, now: Instant) -> bool {
148        let component = error_type.split("-").next().unwrap();
149        let last_report = self.last_report.get_or_insert_with(HashMap::default);
150
151        if let Some(last_report) = last_report.get(component) {
152            match now.checked_duration_since(*last_report) {
153                // Not enough time has passed, rate-limit the report
154                Some(elapsed) if elapsed < Self::INTERVAL => {
155                    return false;
156                }
157                // For all other cases, fall through and allow the report to be sent.
158                //
159                // Note: this also covers the `None` case which happens when the clock is
160                // non-monotonic.  This shouldn't happen often, but it's possible after the OS syncs
161                // with NTP, if users manually adjust their clocks, etc.  Letting an extra event
162                // through seems okay in this case. We should get back into a good state soon
163                // after.
164                _ => (),
165            }
166        }
167        last_report.insert(component.to_string(), now);
168        true
169    }
170}
171
172#[cfg(test)]
173mod test {
174    use super::*;
175
176    #[test]
177    fn test_buffer() {
178        let mut buf = BreadcrumbRingBuffer::default();
179        buf.push("00");
180        buf.push("01");
181        buf.push("02");
182        buf.push("03");
183        buf.push("04");
184        buf.push("05");
185        buf.push("06");
186        buf.push("07");
187        buf.push("08");
188        buf.push("09");
189        assert_eq!(
190            buf.get_breadcrumbs(),
191            vec![
192                "00".to_string(),
193                "01".to_string(),
194                "02".to_string(),
195                "03".to_string(),
196                "04".to_string(),
197                "05".to_string(),
198                "06".to_string(),
199                "07".to_string(),
200                "08".to_string(),
201                "09".to_string(),
202            ]
203        );
204
205        buf.push("10");
206        buf.push("11");
207        buf.push("12");
208        buf.push("13");
209        buf.push("14");
210        buf.push("15");
211        buf.push("16");
212        buf.push("17");
213        buf.push("18");
214        buf.push("19");
215        assert_eq!(
216            buf.get_breadcrumbs(),
217            vec![
218                "00".to_string(),
219                "01".to_string(),
220                "02".to_string(),
221                "03".to_string(),
222                "04".to_string(),
223                "05".to_string(),
224                "06".to_string(),
225                "07".to_string(),
226                "08".to_string(),
227                "09".to_string(),
228                "10".to_string(),
229                "11".to_string(),
230                "12".to_string(),
231                "13".to_string(),
232                "14".to_string(),
233                "15".to_string(),
234                "16".to_string(),
235                "17".to_string(),
236                "18".to_string(),
237                "19".to_string(),
238            ]
239        );
240
241        buf.push("20");
242        assert_eq!(
243            buf.get_breadcrumbs(),
244            vec![
245                "01".to_string(),
246                "02".to_string(),
247                "03".to_string(),
248                "04".to_string(),
249                "05".to_string(),
250                "06".to_string(),
251                "07".to_string(),
252                "08".to_string(),
253                "09".to_string(),
254                "10".to_string(),
255                "11".to_string(),
256                "12".to_string(),
257                "13".to_string(),
258                "14".to_string(),
259                "15".to_string(),
260                "16".to_string(),
261                "17".to_string(),
262                "18".to_string(),
263                "19".to_string(),
264                "20".to_string(),
265            ]
266        );
267
268        buf.push("21");
269        buf.push("22");
270        buf.push("23");
271        buf.push("24");
272        buf.push("25");
273        assert_eq!(
274            buf.get_breadcrumbs(),
275            vec![
276                "06".to_string(),
277                "07".to_string(),
278                "08".to_string(),
279                "09".to_string(),
280                "10".to_string(),
281                "11".to_string(),
282                "12".to_string(),
283                "13".to_string(),
284                "14".to_string(),
285                "15".to_string(),
286                "16".to_string(),
287                "17".to_string(),
288                "18".to_string(),
289                "19".to_string(),
290                "20".to_string(),
291                "21".to_string(),
292                "22".to_string(),
293                "23".to_string(),
294                "24".to_string(),
295                "25".to_string(),
296            ]
297        );
298    }
299
300    #[test]
301    fn test_truncate_string() {
302        // These don't need to be truncated
303        assert_eq!(truncate_string("0".repeat(99), 100).len(), 99);
304        assert_eq!(truncate_string("0".repeat(100), 100).len(), 100);
305        // This one needs truncating
306        assert_eq!(truncate_string("0".repeat(101), 100).len(), 100);
307        // This one needs truncating and we need to make sure don't truncate in the middle of the
308        // fire emoji, which is multiple bytes long.
309        assert_eq!(truncate_string("0".repeat(99) + "🔥", 100).len(), 99);
310    }
311
312    #[test]
313    fn test_rate_limiter() {
314        let mut rate_limiter = RateLimiter::new();
315        let start = Instant::now();
316        let min = Duration::from_secs(60);
317        // The first error report is okay
318        assert!(rate_limiter.should_send_report("test-type", start));
319        // The report should be rate limited until 3 minutes pass, then we can send another one.
320        // Add time from the instant to simulate time going forward.
321        assert!(!rate_limiter.should_send_report("test-type", start));
322        assert!(!rate_limiter.should_send_report("test-type", start + min * 1));
323        assert!(!rate_limiter.should_send_report("test-type", start + min * 2));
324        assert!(rate_limiter.should_send_report("test-type", start + min * 3));
325        assert!(!rate_limiter.should_send_report("test-type", start + min * 4));
326        assert!(!rate_limiter.should_send_report("test-type", start + min * 5));
327        assert!(rate_limiter.should_send_report("test-type", start + min * 6));
328
329        assert!(rate_limiter.should_send_report("test-type", start + min * 60));
330        assert!(!rate_limiter.should_send_report("test-type", start + min * 61));
331        assert!(!rate_limiter.should_send_report("test-type", start + min * 62));
332        assert!(rate_limiter.should_send_report("test-type", start + min * 63));
333    }
334
335    #[test]
336    fn test_rate_limiter_type_matching() {
337        let mut rate_limiter = RateLimiter::new();
338        let start = Instant::now();
339        // Cause error error reports to be rate limited
340        assert!(rate_limiter.should_send_report("componenta-network-error", start));
341        assert!(!rate_limiter.should_send_report("componenta-network-error", start));
342        // Other reports from the same component should also be rate limited
343        assert!(!rate_limiter.should_send_report("componenta-database-error", start));
344        // But not ones from other components
345        assert!(rate_limiter.should_send_report("componentb-database-error", start));
346        assert!(rate_limiter.should_send_report("componentaa-network-error", start));
347    }
348
349    #[test]
350    fn test_add_breadcrumb_timestamp() {
351        assert_eq!(
352            format_breadcrumb_and_timestamp(
353                "MESSAGE",
354                SystemTime::UNIX_EPOCH + Duration::from_secs(10000000)
355            ),
356            "MESSAGE (10000000)"
357        );
358    }
359}