1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use std::collections::HashMap;

use serde::{Deserialize, Serialize};

use super::{Bucketing, Histogram};

use crate::util::floating_point_context::FloatingPointContext;

/// A functional bucketing algorithm.
///
/// Bucketing is performed by a function, rather than pre-computed buckets.
/// The bucket index of a given sample is determined with the following function:
///
/// i = ⌊n log<sub>base</sub>(𝑥)⌋
///
/// In other words, there are n buckets for each power of `base` magnitude.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct Functional {
    exponent: f64,
}

impl Functional {
    /// Instantiate a new functional bucketing.
    fn new(log_base: f64, buckets_per_magnitude: f64) -> Functional {
        // Set the FPU control flag to the required state within this function
        let _fpc = FloatingPointContext::new();

        let exponent = log_base.powf(1.0 / buckets_per_magnitude);

        Functional { exponent }
    }

    /// Maps a sample to a "bucket index" that it belongs in.
    /// A "bucket index" is the consecutive integer index of each bucket, useful as a
    /// mathematical concept, even though the internal representation is stored and
    /// sent using the minimum value in each bucket.
    fn sample_to_bucket_index(&self, sample: u64) -> u64 {
        // Set the FPU control flag to the required state within this function
        let _fpc = FloatingPointContext::new();

        ((sample.saturating_add(1)) as f64).log(self.exponent) as u64
    }

    /// Determines the minimum value of a bucket, given a bucket index.
    fn bucket_index_to_bucket_minimum(&self, index: u64) -> u64 {
        // Set the FPU control flag to the required state within this function
        let _fpc = FloatingPointContext::new();

        self.exponent.powf(index as f64) as u64
    }
}

impl Bucketing for Functional {
    fn sample_to_bucket_minimum(&self, sample: u64) -> u64 {
        if sample == 0 {
            return 0;
        }

        let index = self.sample_to_bucket_index(sample);
        self.bucket_index_to_bucket_minimum(index)
    }

    fn ranges(&self) -> &[u64] {
        unimplemented!("Bucket ranges for functional bucketing are not precomputed")
    }
}

impl Histogram<Functional> {
    /// Creates a histogram with functional buckets.
    pub fn functional(log_base: f64, buckets_per_magnitude: f64) -> Histogram<Functional> {
        Histogram {
            values: HashMap::new(),
            count: 0,
            sum: 0,
            bucketing: Functional::new(log_base, buckets_per_magnitude),
        }
    }

    /// Gets a snapshot of all contiguous values.
    ///
    /// **Caution** This is a more specific implementation of `snapshot_values` on functional
    /// histograms. `snapshot_values` cannot be used with those, due to buckets not being
    /// precomputed.
    pub fn snapshot(&self) -> &HashMap<u64, u64> {
        &self.values
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn can_count() {
        let mut hist = Histogram::functional(2.0, 8.0);
        assert!(hist.is_empty());

        for i in 1..=10 {
            hist.accumulate(i);
        }

        assert_eq!(10, hist.count());
        assert_eq!(55, hist.sum());
    }

    #[test]
    fn sample_to_bucket_minimum_correctly_rounds_down() {
        let hist = Histogram::functional(2.0, 8.0);

        // Check each of the first 100 integers, where numerical accuracy of the round-tripping
        // is most potentially problematic
        for value in 0..100 {
            let bucket_minimum = hist.bucketing.sample_to_bucket_minimum(value);
            assert!(bucket_minimum <= value);

            assert_eq!(
                bucket_minimum,
                hist.bucketing.sample_to_bucket_minimum(bucket_minimum)
            );
        }

        // Do an exponential sampling of higher numbers
        for i in 11..500 {
            let value = 1.5f64.powi(i);
            let value = value as u64;
            let bucket_minimum = hist.bucketing.sample_to_bucket_minimum(value);
            assert!(bucket_minimum <= value);
            assert_eq!(
                bucket_minimum,
                hist.bucketing.sample_to_bucket_minimum(bucket_minimum)
            );
        }
    }

    #[test]
    fn histogram_merge() {
        let mut hist = Histogram::functional(2.0, 8.0);

        // Check each of the first 100 integers, where numerical accuracy of the round-tripping
        // is most potentially problematic
        for sample in 0..100 {
            hist.accumulate(sample);
        }

        let mut filled_hist = hist.clone();

        let mut hist2 = Histogram::functional(2.0, 8.0);
        for sample in 100..200 {
            hist.accumulate(sample);
            hist2.accumulate(sample);
        }

        filled_hist.merge(&hist2);

        assert_eq!(filled_hist, hist);
    }

    #[test]
    #[should_panic]
    fn histogram_merge_different_buckets() {
        let mut hist1 = Histogram::functional(2.0, 8.0);
        let hist2 = Histogram::functional(1.0, 4.0);

        hist1.merge(&hist2);
    }

    #[test]
    fn histogram_clears() {
        let mut hist = Histogram::functional(2.0, 8.0);
        let empty_hist = hist.clone();

        assert_eq!(empty_hist, hist);

        hist.accumulate(13);
        assert_ne!(empty_hist, hist);

        hist.clear();
        assert_eq!(empty_hist, hist);
    }
}