1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.
//! A simple histogram implementation for exponential histograms.
use std::any::TypeId;
use std::collections::HashMap;
use once_cell::sync::OnceCell;
use serde::{Deserialize, Serialize};
use crate::error::{Error, ErrorKind};
pub use exponential::PrecomputedExponential;
pub use functional::Functional;
pub use linear::PrecomputedLinear;
mod exponential;
mod functional;
mod linear;
/// Different kinds of histograms.
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum HistogramType {
/// A histogram with linear distributed buckets.
Linear,
/// A histogram with exponential distributed buckets.
Exponential,
}
impl TryFrom<i32> for HistogramType {
type Error = Error;
fn try_from(value: i32) -> Result<HistogramType, Self::Error> {
match value {
0 => Ok(HistogramType::Linear),
1 => Ok(HistogramType::Exponential),
e => Err(ErrorKind::HistogramType(e).into()),
}
}
}
/// A histogram.
///
/// Stores the counts per bucket and tracks the count of added samples and the total sum.
/// The bucketing algorithm can be changed.
///
/// ## Example
///
/// ```rust,ignore
/// let mut hist = Histogram::exponential(1, 500, 10);
///
/// for i in 1..=10 {
/// hist.accumulate(i);
/// }
///
/// assert_eq!(10, hist.count());
/// assert_eq!(55, hist.sum());
/// ```
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct Histogram<B> {
/// Mapping bucket's minimum to sample count.
values: HashMap<u64, u64>,
/// The count of samples added.
count: u64,
/// The total sum of samples.
sum: u64,
/// The bucketing algorithm used.
bucketing: B,
}
/// A bucketing algorithm for histograms.
///
/// It's responsible to calculate the bucket a sample goes into.
/// It can calculate buckets on-the-fly or pre-calculate buckets and re-use that when needed.
pub trait Bucketing {
/// Get the bucket's minimum value the sample falls into.
fn sample_to_bucket_minimum(&self, sample: u64) -> u64;
/// The computed bucket ranges for this bucketing algorithm.
fn ranges(&self) -> &[u64];
}
impl<B: Bucketing> Histogram<B> {
/// Gets the number of buckets in this histogram.
pub fn bucket_count(&self) -> usize {
self.values.len()
}
/// Adds a single value to this histogram.
pub fn accumulate(&mut self, sample: u64) {
let bucket_min = self.bucketing.sample_to_bucket_minimum(sample);
let entry = self.values.entry(bucket_min).or_insert(0);
*entry += 1;
self.sum = self.sum.saturating_add(sample);
self.count += 1;
}
/// Gets the total sum of values recorded in this histogram.
pub fn sum(&self) -> u64 {
self.sum
}
/// Gets the total count of values recorded in this histogram.
pub fn count(&self) -> u64 {
self.count
}
/// Gets the filled values.
pub fn values(&self) -> &HashMap<u64, u64> {
&self.values
}
/// Checks if this histogram recorded any values.
pub fn is_empty(&self) -> bool {
self.count() == 0
}
/// Gets a snapshot of all values from the first bucket until one past the last filled bucket,
/// filling in empty buckets with 0.
pub fn snapshot_values(&self) -> HashMap<u64, u64> {
let mut res = self.values.clone();
let max_bucket = self.values.keys().max().cloned().unwrap_or(0);
for &min_bucket in self.bucketing.ranges() {
// Fill in missing entries.
let _ = res.entry(min_bucket).or_insert(0);
// stop one after the last filled bucket
if min_bucket > max_bucket {
break;
}
}
res
}
/// Clear this histogram.
pub fn clear(&mut self) {
self.sum = 0;
self.count = 0;
self.values.clear();
}
}
/// Either linear or exponential histogram bucketing
///
/// This is to be used as a single type to avoid generic use in the buffered API.
pub enum LinearOrExponential {
Linear(PrecomputedLinear),
Exponential(PrecomputedExponential),
}
impl Histogram<LinearOrExponential> {
/// A histogram using linear bucketing.
///
/// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
/// This type is only used for the buffered API.
pub fn _linear(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
Histogram {
values: HashMap::new(),
count: 0,
sum: 0,
bucketing: LinearOrExponential::Linear(PrecomputedLinear {
bucket_ranges: OnceCell::new(),
min,
max,
bucket_count,
}),
}
}
/// A histogram using expontential bucketing.
///
/// _Note:_ Special naming to avoid needing to use extensive type annotations in other parts.
/// This type is only used for the buffered API.
pub fn _exponential(min: u64, max: u64, bucket_count: usize) -> Histogram<LinearOrExponential> {
Histogram {
values: HashMap::new(),
count: 0,
sum: 0,
bucketing: LinearOrExponential::Exponential(PrecomputedExponential {
bucket_ranges: OnceCell::new(),
min,
max,
bucket_count,
}),
}
}
}
impl Bucketing for LinearOrExponential {
fn sample_to_bucket_minimum(&self, sample: u64) -> u64 {
use LinearOrExponential::*;
match self {
Linear(lin) => lin.sample_to_bucket_minimum(sample),
Exponential(exp) => exp.sample_to_bucket_minimum(sample),
}
}
fn ranges(&self) -> &[u64] {
use LinearOrExponential::*;
match self {
Linear(lin) => lin.ranges(),
Exponential(exp) => exp.ranges(),
}
}
}
impl<B> Histogram<B>
where
B: Bucketing,
B: std::fmt::Debug,
B: PartialEq,
{
/// Merges data from one histogram into the other.
///
/// ## Panics
///
/// Panics if the two histograms don't use the same bucketing.
pub fn merge(&mut self, other: &Self) {
assert_eq!(self.bucketing, other.bucketing);
self.sum += other.sum;
self.count += other.count;
for (&bucket, &count) in &other.values {
*self.values.entry(bucket).or_insert(0) += count;
}
}
}
impl<B> Histogram<B>
where
B: Bucketing + 'static,
B: std::fmt::Debug,
B: PartialEq,
{
/// Merges data from one histogram into the other.
///
/// ## Panics
///
/// Panics if the two histograms don't use the same bucketing.
/// Note that the `other` side can be either linear or exponential
/// and we only merge if it matches `self`'s bucketing.
// _Note:_ Unfortunately this needs a separate name from the above, otherwise it's a conflicting
// method.
// We only use it internally for the buffered API, and can guarantee correct usage that way.
pub fn _merge(&mut self, other: &Histogram<LinearOrExponential>) {
#[rustfmt::skip]
assert!(
(
TypeId::of::<B>() == TypeId::of::<PrecomputedLinear>()
&& matches!(other.bucketing, LinearOrExponential::Linear(_))
) ||
(
TypeId::of::<B>() == TypeId::of::<PrecomputedExponential>()
&& matches!(other.bucketing, LinearOrExponential::Exponential(_))
)
);
self.sum += other.sum;
self.count += other.count;
for (&bucket, &count) in &other.values {
*self.values.entry(bucket).or_insert(0) += count;
}
}
}