Add histogram for fuzzing

2020-11-13 10:34:23 +01:00
parent 73162ea82c
commit de77d4fc0c
2 changed files with 102 additions and 0 deletions
--- a/libraries/persistent_store/fuzz/src/histogram.rs
+++ b/libraries/persistent_store/fuzz/src/histogram.rs
@@ -0,0 +1,100 @@
+// Copyright 2019-2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use crate::num_bits;
+use std::collections::HashMap;
+
+/// Histogram with logarithmic buckets.
+///
+/// This is used to compute coverage statistics of the fuzzing runs of a corpus. This is not used
+/// during actual fuzzing, only when replaying a corpus to compute statistics.
+#[derive(Default)]
+pub struct Histogram {
+    /// Maps each bucket to its count.
+    ///
+    /// Buckets are numbers sharing the same highest bit. The first buckets are: only 0, only 1, 2
+    /// to 3, 4 to 7, 8 to 15. Buckets are identified by their lower-bound.
+    buckets: HashMap<usize, usize>,
+}
+
+impl Histogram {
+    /// Increases the count of the bucket of an item.
+    ///
+    /// The bucket of `item` is the highest power of two, lower or equal to `item`. If `item` is
+    /// zero, then its bucket is also zero.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the item is too big, i.e. it uses its most significant bit.
+    pub fn add(&mut self, item: usize) {
+        assert!(item <= usize::max_value() / 2);
+        *self.buckets.entry(get_bucket(item)).or_insert(0) += 1;
+    }
+
+    /// Merges another histogram into this one.
+    pub fn merge(&mut self, other: &Histogram) {
+        for (&bucket, &count) in &other.buckets {
+            *self.buckets.entry(bucket).or_insert(0) += count;
+        }
+    }
+
+    /// Returns one past the highest non-empty bucket.
+    ///
+    /// In other words, all non-empty buckets of the histogram are smaller than the returned bucket.
+    pub fn bucket_lim(&self) -> usize {
+        match self.buckets.keys().max() {
+            None => 0,
+            Some(0) => 1,
+            Some(x) => 2 * x,
+        }
+    }
+
+    /// Returns the count of a bucket.
+    pub fn get(&self, bucket: usize) -> Option<usize> {
+        self.buckets.get(&bucket).cloned()
+    }
+
+    /// Returns the total count.
+    pub fn count(&self) -> usize {
+        self.buckets.values().sum()
+    }
+}
+
+/// Returns the bucket of an item.
+fn get_bucket(item: usize) -> usize {
+    let bucket = bucket_from_width(num_bits(item));
+    assert!(bucket <= item && (item == 0 || item / 2 < bucket));
+    bucket
+}
+
+/// Returns the bucket of an item given its bit-width.
+pub fn bucket_from_width(width: usize) -> usize {
+    if width == 0 {
+        0
+    } else {
+        1 << (width - 1)
+    }
+}
+
+#[test]
+fn get_bucket_ok() {
+    assert_eq!(get_bucket(0), 0);
+    assert_eq!(get_bucket(1), 1);
+    assert_eq!(get_bucket(2), 2);
+    assert_eq!(get_bucket(3), 2);
+    assert_eq!(get_bucket(4), 4);
+    assert_eq!(get_bucket(7), 4);
+    assert_eq!(get_bucket(8), 8);
+    assert_eq!(get_bucket(15), 8);
+}
--- a/libraries/persistent_store/fuzz/src/lib.rs
+++ b/libraries/persistent_store/fuzz/src/lib.rs
@@ -28,6 +28,8 @@
 // TODO(ia0): Remove when used.
 #![allow(dead_code)]

+mod histogram;
+
 /// Bit-level entropy source based on a byte slice shared reference.
 ///
 /// This is used to convert the byte slice provided by the fuzzer into the entropy used by the