suggest/benchmarks/
ingest.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use std::sync::OnceLock;

use crate::{
    benchmarks::{client::RemoteSettingsBenchmarkClient, unique_db_filename, BenchmarkWithInput},
    rs::SuggestRecordType,
    store::SuggestStoreInner,
    SuggestIngestionConstraints,
};

pub struct IngestBenchmark {
    temp_dir: tempfile::TempDir,
    client: RemoteSettingsBenchmarkClient,
    record_type: SuggestRecordType,
    reingest: bool,
}

/// Get a benchmark client to use for the tests
///
/// Uses OnceLock to ensure we only construct it once.
fn get_benchmark_client() -> RemoteSettingsBenchmarkClient {
    static CELL: OnceLock<RemoteSettingsBenchmarkClient> = OnceLock::new();
    CELL.get_or_init(|| {
        RemoteSettingsBenchmarkClient::new()
            .unwrap_or_else(|e| panic!("Error creating benchmark client {e}"))
    })
    .clone()
}

impl IngestBenchmark {
    pub fn new(record_type: SuggestRecordType, reingest: bool) -> Self {
        let temp_dir = tempfile::tempdir().unwrap();
        Self {
            client: get_benchmark_client(),
            temp_dir,
            record_type,
            reingest,
        }
    }
}

// The input for each benchmark is `SuggestStoreInner` with a fresh database.
//
// This is wrapped in a newtype so that it can be exposed in the public trait
pub struct InputType(SuggestStoreInner<RemoteSettingsBenchmarkClient>);

impl BenchmarkWithInput for IngestBenchmark {
    type GlobalInput = ();
    type IterationInput = InputType;

    fn global_input(&self) -> Self::GlobalInput {}

    fn iteration_input(&self) -> Self::IterationInput {
        let data_path = self.temp_dir.path().join(unique_db_filename());
        let store = SuggestStoreInner::new(data_path, vec![], self.client.clone());
        store.ensure_db_initialized();
        if self.reingest {
            store.ingest_records_by_type(self.record_type);
            store.force_reingest();
        }
        InputType(store)
    }

    fn benchmarked_code(&self, _: &Self::GlobalInput, input: Self::IterationInput) {
        let InputType(store) = input;
        store.ingest_records_by_type(self.record_type);
    }
}

/// Get IngestBenchmark instances for all record types
pub fn all_benchmarks() -> Vec<(&'static str, IngestBenchmark)> {
    vec![
        (
            "ingest-icon",
            IngestBenchmark::new(SuggestRecordType::Icon, false),
        ),
        (
            "ingest-again-icon",
            IngestBenchmark::new(SuggestRecordType::Icon, true),
        ),
        (
            "ingest-amp-wikipedia",
            IngestBenchmark::new(SuggestRecordType::AmpWikipedia, false),
        ),
        (
            "ingest-again-amp-wikipedia",
            IngestBenchmark::new(SuggestRecordType::AmpWikipedia, true),
        ),
        (
            "ingest-amo",
            IngestBenchmark::new(SuggestRecordType::Amo, false),
        ),
        (
            "ingest-again-amo",
            IngestBenchmark::new(SuggestRecordType::Amo, true),
        ),
        (
            "ingest-pocket",
            IngestBenchmark::new(SuggestRecordType::Pocket, false),
        ),
        (
            "ingest-again-pocket",
            IngestBenchmark::new(SuggestRecordType::Pocket, true),
        ),
        (
            "ingest-yelp",
            IngestBenchmark::new(SuggestRecordType::Yelp, false),
        ),
        (
            "ingest-again-yelp",
            IngestBenchmark::new(SuggestRecordType::Yelp, true),
        ),
        (
            "ingest-mdn",
            IngestBenchmark::new(SuggestRecordType::Mdn, false),
        ),
        (
            "ingest-again-mdn",
            IngestBenchmark::new(SuggestRecordType::Mdn, true),
        ),
        (
            "ingest-weather",
            IngestBenchmark::new(SuggestRecordType::Weather, false),
        ),
        (
            "ingest-again-weather",
            IngestBenchmark::new(SuggestRecordType::Weather, true),
        ),
        (
            "ingest-global-config",
            IngestBenchmark::new(SuggestRecordType::GlobalConfig, false),
        ),
        (
            "ingest-again-global-config",
            IngestBenchmark::new(SuggestRecordType::GlobalConfig, true),
        ),
        (
            "ingest-amp-mobile",
            IngestBenchmark::new(SuggestRecordType::AmpMobile, false),
        ),
        (
            "ingest-again-amp-mobile",
            IngestBenchmark::new(SuggestRecordType::AmpMobile, true),
        ),
        (
            "ingest-fakespot",
            IngestBenchmark::new(SuggestRecordType::Fakespot, false),
        ),
        (
            "ingest-again-fakespot",
            IngestBenchmark::new(SuggestRecordType::Fakespot, true),
        ),
    ]
}

pub fn print_debug_ingestion_sizes() {
    viaduct_reqwest::use_reqwest_backend();
    let store = SuggestStoreInner::new(
        "file:debug_ingestion_sizes?mode=memory&cache=shared",
        vec![],
        RemoteSettingsBenchmarkClient::new().unwrap(),
    );
    store
        .ingest(SuggestIngestionConstraints {
            // Uncomment to measure the size for a specific provider
            // providers: Some(vec![crate::SuggestionProvider::Fakespot]),
            ..SuggestIngestionConstraints::default()
        })
        .unwrap();
    let table_row_counts = store.table_row_counts();
    let db_size = store.db_size();
    let client = store.into_settings_client();
    println!("Attachment sizes");
    println!("-------------------------");
    let attachment_sizes = client.attachment_size_by_record_type();
    let total_attachment_size: usize = attachment_sizes.iter().map(|(_, size)| size).sum();
    for (record_type, size) in attachment_sizes {
        println!("{:30} {}kb", record_type.as_str(), (size + 500) / 1000)
    }
    println!();
    println!(
        "Total attachment size: {}kb",
        (total_attachment_size + 500) / 1000
    );

    println!("Database table row counts");
    println!("-------------------------");
    for (name, count) in table_row_counts {
        println!("{name:30} {count}");
    }
    println!();
    println!("Total database size: {}kb", (db_size + 500) / 1000);
}