From de77d4fc0cd13b7b22916babad4dd4d9a0f0f702 Mon Sep 17 00:00:00 2001 From: Julien Cretin Date: Fri, 13 Nov 2020 10:34:23 +0100 Subject: [PATCH] Add histogram for fuzzing --- .../persistent_store/fuzz/src/histogram.rs | 100 ++++++++++++++++++ libraries/persistent_store/fuzz/src/lib.rs | 2 + 2 files changed, 102 insertions(+) create mode 100644 libraries/persistent_store/fuzz/src/histogram.rs diff --git a/libraries/persistent_store/fuzz/src/histogram.rs b/libraries/persistent_store/fuzz/src/histogram.rs new file mode 100644 index 0000000..0d76fd7 --- /dev/null +++ b/libraries/persistent_store/fuzz/src/histogram.rs @@ -0,0 +1,100 @@ +// Copyright 2019-2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::num_bits; +use std::collections::HashMap; + +/// Histogram with logarithmic buckets. +/// +/// This is used to compute coverage statistics of the fuzzing runs of a corpus. This is not used +/// during actual fuzzing, only when replaying a corpus to compute statistics. +#[derive(Default)] +pub struct Histogram { + /// Maps each bucket to its count. + /// + /// Buckets are numbers sharing the same highest bit. The first buckets are: only 0, only 1, 2 + /// to 3, 4 to 7, 8 to 15. Buckets are identified by their lower-bound. + buckets: HashMap, +} + +impl Histogram { + /// Increases the count of the bucket of an item. + /// + /// The bucket of `item` is the highest power of two, lower or equal to `item`. If `item` is + /// zero, then its bucket is also zero. + /// + /// # Panics + /// + /// Panics if the item is too big, i.e. it uses its most significant bit. + pub fn add(&mut self, item: usize) { + assert!(item <= usize::max_value() / 2); + *self.buckets.entry(get_bucket(item)).or_insert(0) += 1; + } + + /// Merges another histogram into this one. + pub fn merge(&mut self, other: &Histogram) { + for (&bucket, &count) in &other.buckets { + *self.buckets.entry(bucket).or_insert(0) += count; + } + } + + /// Returns one past the highest non-empty bucket. + /// + /// In other words, all non-empty buckets of the histogram are smaller than the returned bucket. + pub fn bucket_lim(&self) -> usize { + match self.buckets.keys().max() { + None => 0, + Some(0) => 1, + Some(x) => 2 * x, + } + } + + /// Returns the count of a bucket. + pub fn get(&self, bucket: usize) -> Option { + self.buckets.get(&bucket).cloned() + } + + /// Returns the total count. + pub fn count(&self) -> usize { + self.buckets.values().sum() + } +} + +/// Returns the bucket of an item. +fn get_bucket(item: usize) -> usize { + let bucket = bucket_from_width(num_bits(item)); + assert!(bucket <= item && (item == 0 || item / 2 < bucket)); + bucket +} + +/// Returns the bucket of an item given its bit-width. +pub fn bucket_from_width(width: usize) -> usize { + if width == 0 { + 0 + } else { + 1 << (width - 1) + } +} + +#[test] +fn get_bucket_ok() { + assert_eq!(get_bucket(0), 0); + assert_eq!(get_bucket(1), 1); + assert_eq!(get_bucket(2), 2); + assert_eq!(get_bucket(3), 2); + assert_eq!(get_bucket(4), 4); + assert_eq!(get_bucket(7), 4); + assert_eq!(get_bucket(8), 8); + assert_eq!(get_bucket(15), 8); +} diff --git a/libraries/persistent_store/fuzz/src/lib.rs b/libraries/persistent_store/fuzz/src/lib.rs index 1feeabc..3dd5932 100644 --- a/libraries/persistent_store/fuzz/src/lib.rs +++ b/libraries/persistent_store/fuzz/src/lib.rs @@ -28,6 +28,8 @@ // TODO(ia0): Remove when used. #![allow(dead_code)] +mod histogram; + /// Bit-level entropy source based on a byte slice shared reference. /// /// This is used to convert the byte slice provided by the fuzzer into the entropy used by the