suggest/benchmarks/
ingest.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use std::sync::OnceLock;
6
7use crate::{
8    benchmarks::{client::RemoteSettingsBenchmarkClient, unique_db_filename, BenchmarkWithInput},
9    provider::SuggestionProvider,
10    rs::{Collection, SuggestRecordType},
11    store::SuggestStoreInner,
12    SuggestIngestionConstraints,
13};
14
15pub struct IngestBenchmark {
16    temp_dir: tempfile::TempDir,
17    client: RemoteSettingsBenchmarkClient,
18    collection: Collection,
19    record_type: SuggestRecordType,
20    reingest: bool,
21}
22
23/// Get a benchmark client to use for the tests
24///
25/// Uses OnceLock to ensure we only construct it once.
26fn get_benchmark_client() -> RemoteSettingsBenchmarkClient {
27    static CELL: OnceLock<RemoteSettingsBenchmarkClient> = OnceLock::new();
28    CELL.get_or_init(|| {
29        RemoteSettingsBenchmarkClient::new()
30            .unwrap_or_else(|e| panic!("Error creating benchmark client {e}"))
31    })
32    .clone()
33}
34
35impl IngestBenchmark {
36    pub fn new(provider: SuggestionProvider, reingest: bool) -> Self {
37        Self::new_with_record_type(
38            provider.primary_collection(),
39            provider.primary_record_type(),
40            reingest,
41        )
42    }
43
44    pub fn new_with_record_type(
45        collection: Collection,
46        record_type: SuggestRecordType,
47        reingest: bool,
48    ) -> Self {
49        let temp_dir = tempfile::tempdir().unwrap();
50        Self {
51            client: get_benchmark_client(),
52            temp_dir,
53            collection,
54            record_type,
55            reingest,
56        }
57    }
58}
59
60// The input for each benchmark is `SuggestStoreInner` with a fresh database.
61//
62// This is wrapped in a newtype so that it can be exposed in the public trait
63pub struct InputType(SuggestStoreInner<RemoteSettingsBenchmarkClient>);
64
65impl BenchmarkWithInput for IngestBenchmark {
66    type GlobalInput = ();
67    type IterationInput = InputType;
68
69    fn global_input(&self) -> Self::GlobalInput {}
70
71    fn iteration_input(&self) -> Self::IterationInput {
72        let data_path = self.temp_dir.path().join(unique_db_filename());
73        let store = SuggestStoreInner::new(data_path, vec![], self.client.clone());
74        store.ensure_db_initialized();
75        if self.reingest {
76            store.ingest_records_by_type(self.collection, self.record_type);
77            store.force_reingest();
78        }
79        InputType(store)
80    }
81
82    fn benchmarked_code(&self, _: &Self::GlobalInput, input: Self::IterationInput) {
83        let InputType(store) = input;
84        store.ingest_records_by_type(self.collection, self.record_type);
85    }
86}
87
88/// Get IngestBenchmark instances for all record types
89pub fn all_benchmarks() -> Vec<(&'static str, IngestBenchmark)> {
90    vec![
91        (
92            "ingest-icon-amp",
93            IngestBenchmark::new_with_record_type(Collection::Amp, SuggestRecordType::Icon, false),
94        ),
95        (
96            "ingest-again-icon-amp",
97            IngestBenchmark::new_with_record_type(Collection::Amp, SuggestRecordType::Icon, true),
98        ),
99        (
100            "ingest-icon-other",
101            IngestBenchmark::new_with_record_type(
102                Collection::Other,
103                SuggestRecordType::Icon,
104                false,
105            ),
106        ),
107        (
108            "ingest-again-icon-other",
109            IngestBenchmark::new_with_record_type(Collection::Other, SuggestRecordType::Icon, true),
110        ),
111        (
112            "ingest-amp",
113            IngestBenchmark::new(SuggestionProvider::Amp, false),
114        ),
115        (
116            "ingest-again-amp",
117            IngestBenchmark::new(SuggestionProvider::Amp, true),
118        ),
119        (
120            "ingest-wikipedia",
121            IngestBenchmark::new(SuggestionProvider::Wikipedia, false),
122        ),
123        (
124            "ingest-again-wikipedia",
125            IngestBenchmark::new(SuggestionProvider::Wikipedia, true),
126        ),
127        (
128            "ingest-amo",
129            IngestBenchmark::new(SuggestionProvider::Amo, false),
130        ),
131        (
132            "ingest-again-amo",
133            IngestBenchmark::new(SuggestionProvider::Amo, true),
134        ),
135        (
136            "ingest-yelp",
137            IngestBenchmark::new(SuggestionProvider::Yelp, false),
138        ),
139        (
140            "ingest-again-yelp",
141            IngestBenchmark::new(SuggestionProvider::Yelp, true),
142        ),
143        (
144            "ingest-mdn",
145            IngestBenchmark::new(SuggestionProvider::Mdn, false),
146        ),
147        (
148            "ingest-again-mdn",
149            IngestBenchmark::new(SuggestionProvider::Mdn, true),
150        ),
151        (
152            "ingest-weather",
153            IngestBenchmark::new(SuggestionProvider::Weather, false),
154        ),
155        (
156            "ingest-again-weather",
157            IngestBenchmark::new(SuggestionProvider::Weather, true),
158        ),
159        (
160            "ingest-global-config",
161            IngestBenchmark::new_with_record_type(
162                Collection::Other,
163                SuggestRecordType::GlobalConfig,
164                false,
165            ),
166        ),
167        (
168            "ingest-again-global-config",
169            IngestBenchmark::new_with_record_type(
170                Collection::Other,
171                SuggestRecordType::GlobalConfig,
172                true,
173            ),
174        ),
175        (
176            "ingest-fakespot",
177            IngestBenchmark::new(SuggestionProvider::Fakespot, false),
178        ),
179        (
180            "ingest-again-fakespot",
181            IngestBenchmark::new(SuggestionProvider::Fakespot, true),
182        ),
183    ]
184}
185
186pub fn print_debug_ingestion_sizes() {
187    viaduct_reqwest::use_reqwest_backend();
188    let store = SuggestStoreInner::new(
189        "file:debug_ingestion_sizes?mode=memory&cache=shared",
190        vec![],
191        RemoteSettingsBenchmarkClient::new().unwrap(),
192    );
193    store
194        .ingest(SuggestIngestionConstraints {
195            // Uncomment to measure the size for a specific provider
196            // providers: Some(vec![crate::SuggestionProvider::Fakespot]),
197            ..SuggestIngestionConstraints::default()
198        })
199        .unwrap();
200    let table_row_counts = store.table_row_counts();
201    let db_size = store.db_size();
202    let client = store.into_settings_client();
203    println!("Attachment sizes");
204    println!("-------------------------");
205    let attachment_sizes = client.attachment_size_by_record_type();
206    let total_attachment_size: usize = attachment_sizes.iter().map(|(_, size)| size).sum();
207    for (record_type, size) in attachment_sizes {
208        println!("{:30} {}kb", record_type.as_str(), (size + 500) / 1000)
209    }
210    println!();
211    println!(
212        "Total attachment size: {}kb",
213        (total_attachment_size + 500) / 1000
214    );
215
216    println!("Database table row counts");
217    println!("-------------------------");
218    for (name, count) in table_row_counts {
219        println!("{name:30} {count}");
220    }
221    println!();
222    println!("Total database size: {}kb", (db_size + 500) / 1000);
223}