1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! A simple histogram implementation for exponential histograms.

use std::any::TypeId;
use std::collections::HashMap;

use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};

use crate::error::{Error, ErrorKind};

pub use exponential::PrecomputedExponential;
pub use functional::Functional;
pub use linear::PrecomputedLinear;

mod exponential;
mod functional;
mod linear;

/// Different kinds of histograms.
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum HistogramType {
    /// A histogram with linear distributed buckets.
    Linear,
    /// A histogram with exponential distributed buckets.
    Exponential,
}

impl TryFrom<i32> for HistogramType {
    type Error = Error;

    fn try_from(value: i32) -> Result<HistogramType, Self::Error> {
        match value {
            0 => Ok(HistogramType::Linear),
            1 => Ok(HistogramType::Exponential),
            e => Err(ErrorKind::HistogramType(e).into()),
        }
    }
}

/// A histogram.
///
/// Stores the counts per bucket and tracks the count of added samples and the total sum.
/// The bucketing algorithm can be changed.
///
/// ## Example
///
/// ```rust,ignore
/// let mut hist = Histogram::exponential(1, 500, 10);
///
/// for i in 1..=10 {
///     hist.accumulate(i);
/// }
///
/// assert_eq!(10, hist.count());
/// assert_eq!(55, hist.sum());
/// ```
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Histogram<B> {
    /// Mapping bucket's minimum to sample count.
    values: HashMap<u64, u64>,

    /// The count of samples added.
    count: u64,
    /// The total sum of samples.
    sum: u64,

    /// The bucketing algorithm used.
    bucketing: B,
}

/// A bucketing algorithm for histograms.
///
/// It's responsible to calculate the bucket a sample goes into.
/// It can calculate buckets on-the-fly or pre-calculate buckets and re-use that when needed.
pub trait Bucketing {
    /// Get the bucket's minimum value the sample falls into.
    fn sample_to_bucket_minimum(&self, sample: u64) -> u64;

    /// The computed bucket ranges for this bucketing algorithm.
    fn ranges(&self) -> &[u64];
}

impl<B: Bucketing> Histogram<B> {
    /// Gets the number of buckets in this histogram.
    pub fn bucket_count(&self) -> usize {
        self.values.len()
    }

    /// Adds a single value to this histogram.
    pub fn accumulate(&mut self, sample: u64) {
        let bucket_min = self.bucketing.sample_to_bucket_minimum(sample);
        let entry = self.values.entry(bucket_min).or_insert(0);
        *entry += 1;
        self.sum = self.sum.saturating_add(sample);
        self.count += 1;
    }

    /// Gets the total sum of values recorded in this histogram.
    pub fn sum(&self) -> u64 {
        self.sum
    }

    /// Gets the total count of values recorded in this histogram.
    pub fn count(&self) -> u64 {
        self.count
    }

    /// Gets the filled values.
    pub fn values(&self) -> &HashMap<u64, u64> {
        &self.values
    }

    /// Checks if this histogram recorded any values.
    pub fn is_empty(&self) -> bool {
        self.count() == 0
    }

    /// Gets a snapshot of all values from the first bucket until one past the last filled bucket,
    /// filling in empty buckets with 0.
    pub fn snapshot_values(&self) -> HashMap<u64, u64> {
        let mut res = self.values.clone();

        let max_bucket = self.values.keys().max().cloned().unwrap_or(0);

        for &min_bucket in self.bucketing.ranges() {
            // Fill in missing entries.
            let _ = res.entry(min_bucket).or_insert(0);
            // stop one after the last filled bucket
            if min_bucket > max_bucket {
                break;
            }
        }
        res
    }

    /// Clear this histogram.
    pub fn clear(&mut self) {
        self.sum = 0;
        self.count = 0;
        self.values.clear();
    }
}

/// Either linear or exponential histogram bucketing
///
/// This is to be used as a single type to avoid generic use in the buffered API.
pub enum LinearOrExponential {
    Linear(PrecomputedLinear),
    Exponential(PrecomputedExponential),
}

impl Histogram<LinearOrExponential> {
    /// A histogram using linear bucketing.
    ///
    /// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
    /// This type is only used for the buffered API.
    pub fn _linear(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
        Histogram {
            values: HashMap::new(),
            count: 0,
            sum: 0,
            bucketing: LinearOrExponential::Linear(PrecomputedLinear {
                bucket_ranges: OnceCell::new(),
                min,
                max,
                bucket_count,
            }),
        }
    }

    /// A histogram using expontential bucketing.
    ///
    /// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
    /// This type is only used for the buffered API.
    pub fn _exponential(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
        Histogram {
            values: HashMap::new(),
            count: 0,
            sum: 0,
            bucketing: LinearOrExponential::Exponential(PrecomputedExponential {
                bucket_ranges: OnceCell::new(),
                min,
                max,
                bucket_count,
            }),
        }
    }
}

impl Bucketing for LinearOrExponential {
    fn sample_to_bucket_minimum(&self, sample: u64) -> u64 {
        use LinearOrExponential::*;
        match self {
            Linear(lin) => lin.sample_to_bucket_minimum(sample),
            Exponential(exp) => exp.sample_to_bucket_minimum(sample),
        }
    }

    fn ranges(&self) -> &[u64] {
        use LinearOrExponential::*;
        match self {
            Linear(lin) => lin.ranges(),
            Exponential(exp) => exp.ranges(),
        }
    }
}

impl<B> Histogram<B>
where
    B: Bucketing,
    B: std::fmt::Debug,
    B: PartialEq,
{
    /// Merges data from one histogram into the other.
    ///
    /// ## Panics
    ///
    /// Panics if the two histograms don't use the same bucketing.
    pub fn merge(&mut self, other: &Self) {
        assert_eq!(self.bucketing, other.bucketing);

        self.sum += other.sum;
        self.count += other.count;
        for (&bucket, &count) in &other.values {
            *self.values.entry(bucket).or_insert(0) += count;
        }
    }
}

impl<B> Histogram<B>
where
    B: Bucketing + 'static,
    B: std::fmt::Debug,
    B: PartialEq,
{
    /// Merges data from one histogram into the other.
    ///
    /// ## Panics
    ///
    /// Panics if the two histograms don't use the same bucketing.
    /// Note that the `other` side can be either linear or exponential
    /// and we only merge if it matches `self`'s bucketing.
    // _Note:_ Unfortunately this needs a separate name from the above, otherwise it's a conflicting
    // method.
    // We only use it internally for the buffered API, and can guarantee correct usage that way.
    pub fn _merge(&mut self, other: &Histogram<LinearOrExponential>) {
        #[rustfmt::skip]
        assert!(
            (
                TypeId::of::<B>() == TypeId::of::<PrecomputedLinear>()
                && matches!(other.bucketing, LinearOrExponential::Linear(_))
            ) ||
            (
                TypeId::of::<B>() == TypeId::of::<PrecomputedExponential>()
                && matches!(other.bucketing, LinearOrExponential::Exponential(_))
            )
        );
        self.sum += other.sum;
        self.count += other.count;
        for (&bucket, &count) in &other.values {
            *self.values.entry(bucket).or_insert(0) += count;
        }
    }
}