Skip to content

Commit 88ba2d4

Browse files
chore: add unit test to validate benchmark code (#72)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 494ad56 commit 88ba2d4

4 files changed

Lines changed: 535 additions & 348 deletions

File tree

Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ name = "fastpfor_benchmark"
2121
required-features = ["rust"]
2222
harness = false
2323

24+
[[bench]]
25+
name = "bench_utils"
26+
required-features = ["rust"]
27+
harness = false
28+
bench = false
29+
2430
[features]
2531
# Eventually we may want to build without the C++ bindings by default.
2632
# Keeping it on for now to simplify development.

benches/bench_utils.rs

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
//! Shared data generators, codec helpers, and pre-computed fixtures used by
2+
//! both the Criterion benchmark (`fastpfor_benchmark.rs`) and the smoke-test
3+
//! suite (`tests/benchmark_smoke.rs`).
4+
//!
5+
//! Loaded as a module via `#[path]` in both consumers, so every item consumed
6+
//! from outside must be `pub`.
7+
8+
// This is an internal dev-only module; doc-comments on every field would add
9+
// noise without benefit.
10+
#![allow(missing_docs)]
11+
12+
use core::ops::Range;
13+
pub use std::io::Cursor;
14+
use std::num::NonZeroU32;
15+
16+
pub use fastpfor::rust::{BLOCK_SIZE_128, BLOCK_SIZE_256, DEFAULT_PAGE_SIZE, FastPFOR, Integer};
17+
use rand::rngs::StdRng;
18+
use rand::{RngExt as _, SeedableRng};
19+
20+
const SEED: u64 = 456;
21+
22+
// ---------------------------------------------------------------------------
23+
// Data generators (private — only used to build fixtures)
24+
// ---------------------------------------------------------------------------
25+
26+
type DataGeneratorFn = fn(usize) -> Vec<u32>;
27+
28+
fn generate_uniform_data_from_range(size: usize, value_range: Range<u32>) -> Vec<u32> {
29+
let mut rng = StdRng::seed_from_u64(SEED);
30+
(0..size)
31+
.map(|_| rng.random_range(value_range.clone()))
32+
.collect()
33+
}
34+
35+
pub fn generate_uniform_data_small_value_distribution(size: usize) -> Vec<u32> {
36+
generate_uniform_data_from_range(size, 0..1000)
37+
}
38+
39+
fn generate_uniform_data_large_value_distribution(size: usize) -> Vec<u32> {
40+
generate_uniform_data_from_range(size, 0..u32::MAX)
41+
}
42+
43+
fn generate_clustered_data(size: usize) -> Vec<u32> {
44+
let mut rng = StdRng::seed_from_u64(SEED);
45+
let mut base = 0u32;
46+
(0..size)
47+
.map(|_| {
48+
if rng.random_bool(0.1) {
49+
base = rng.random_range(0..1000);
50+
}
51+
base + rng.random_range(0..10)
52+
})
53+
.collect()
54+
}
55+
56+
fn generate_sequential_data(size: usize) -> Vec<u32> {
57+
(0..size as u32).collect()
58+
}
59+
60+
fn generate_sparse_data(size: usize) -> Vec<u32> {
61+
let mut rng = StdRng::seed_from_u64(SEED);
62+
(0..size)
63+
.map(|_| {
64+
if rng.random_bool(0.9) {
65+
0
66+
} else {
67+
rng.random()
68+
}
69+
})
70+
.collect()
71+
}
72+
73+
fn generate_constant_data(size: usize) -> Vec<u32> {
74+
vec![SEED as u32; size]
75+
}
76+
77+
fn generate_geometric_data(size: usize) -> Vec<u32> {
78+
(0..size).map(|i| 1u32 << (i % 30)).collect()
79+
}
80+
81+
/// Patterns used by compression / decompression / roundtrip / block-size benchmarks.
82+
const COMPRESS_PATTERNS: &[(&str, DataGeneratorFn)] = &[
83+
(
84+
"uniform_small_value_distribution",
85+
generate_uniform_data_small_value_distribution,
86+
),
87+
(
88+
"uniform_large_value_distribution",
89+
generate_uniform_data_large_value_distribution,
90+
),
91+
("clustered", generate_clustered_data),
92+
("sequential", generate_sequential_data),
93+
("sparse", generate_sparse_data),
94+
];
95+
96+
/// Superset of `COMPRESS_PATTERNS`, also used by the compression-ratio benchmark.
97+
const ALL_PATTERNS: &[(&str, DataGeneratorFn)] = &[
98+
(
99+
"uniform_small_distribution",
100+
generate_uniform_data_small_value_distribution,
101+
),
102+
(
103+
"uniform_large_distribution",
104+
generate_uniform_data_large_value_distribution,
105+
),
106+
("clustered", generate_clustered_data),
107+
("sequential", generate_sequential_data),
108+
("sparse", generate_sparse_data),
109+
("constant", generate_constant_data),
110+
("geometric", generate_geometric_data),
111+
];
112+
113+
// ---------------------------------------------------------------------------
114+
// Codec helpers
115+
// ---------------------------------------------------------------------------
116+
117+
/// Compress `data` and return the compressed words.
118+
pub fn compress_data(codec: &mut FastPFOR, data: &[u32]) -> Vec<u32> {
119+
let mut compressed = vec![0u32; data.len() * 2 + 1024];
120+
let mut input_offset = Cursor::new(0);
121+
let mut output_offset = Cursor::new(0);
122+
codec
123+
.compress(
124+
data,
125+
data.len() as u32,
126+
&mut input_offset,
127+
&mut compressed,
128+
&mut output_offset,
129+
)
130+
.unwrap();
131+
let len = output_offset.position() as usize;
132+
compressed.truncate(len);
133+
compressed
134+
}
135+
136+
/// Decompress `compressed` into the caller-provided `decompressed` buffer and
137+
/// return the number of elements written.
138+
///
139+
/// The buffer must be allocated outside the timed loop so that allocation cost
140+
/// is not measured.
141+
pub fn decompress_data(
142+
codec: &mut FastPFOR,
143+
compressed: &[u32],
144+
decompressed: &mut [u32],
145+
) -> usize {
146+
let mut input_offset = Cursor::new(0);
147+
let mut output_offset = Cursor::new(0);
148+
codec
149+
.uncompress(
150+
compressed,
151+
compressed.len() as u32,
152+
&mut input_offset,
153+
decompressed,
154+
&mut output_offset,
155+
)
156+
.unwrap();
157+
output_offset.position() as usize
158+
}
159+
160+
/// Pre-compress `data` with a specific `block_size` and return the compressed buffer.
161+
fn prepare_compressed_data(data: &[u32], block_size: NonZeroU32) -> Vec<u32> {
162+
compress_data(&mut FastPFOR::new(DEFAULT_PAGE_SIZE, block_size), data)
163+
}
164+
165+
// ---------------------------------------------------------------------------
166+
// C++ helpers (compiled only when the `cpp` feature is active)
167+
// ---------------------------------------------------------------------------
168+
169+
#[cfg(feature = "cpp")]
170+
pub fn cpp_encode(codec: &fastpfor::cpp::FastPFor128Codec, data: &[u32]) -> Vec<u32> {
171+
use fastpfor::cpp::Codec32 as _;
172+
let mut out = vec![0u32; data.len() * 2 + 1024];
173+
let new_len = codec.encode32(data, &mut out).unwrap().len();
174+
out.truncate(new_len);
175+
out
176+
}
177+
178+
#[cfg(feature = "cpp")]
179+
pub fn cpp_decode(
180+
codec: &fastpfor::cpp::FastPFor128Codec,
181+
compressed: &[u32],
182+
decompressed: &mut [u32],
183+
) -> usize {
184+
use fastpfor::cpp::Codec32 as _;
185+
codec.decode32(compressed, decompressed).unwrap().len()
186+
}
187+
188+
// ---------------------------------------------------------------------------
189+
// Pre-computed fixtures
190+
// ---------------------------------------------------------------------------
191+
192+
/// One row of pre-computed data for compression / decompression benchmarks.
193+
pub struct CompressFixture {
194+
pub name: &'static str,
195+
pub data: Vec<u32>,
196+
/// Rust-compressed form (`BLOCK_SIZE_128`), ready for decompression benchmarks.
197+
pub rust_compressed: Vec<u32>,
198+
}
199+
200+
impl CompressFixture {
201+
fn new(name: &'static str, generator: DataGeneratorFn, size: usize) -> Self {
202+
let data = generator(size);
203+
let rust_compressed = prepare_compressed_data(&data, BLOCK_SIZE_128);
204+
Self {
205+
name,
206+
data,
207+
rust_compressed,
208+
}
209+
}
210+
}
211+
212+
/// Build fixtures for every `COMPRESS_PATTERNS × sizes` combination.
213+
pub fn compress_fixtures(sizes: &[usize]) -> Vec<(usize, CompressFixture)> {
214+
sizes
215+
.iter()
216+
.flat_map(|&size| {
217+
COMPRESS_PATTERNS
218+
.iter()
219+
.map(move |&(name, generator)| (size, CompressFixture::new(name, generator, size)))
220+
})
221+
.collect()
222+
}
223+
224+
/// Build fixtures for every `ALL_PATTERNS` at a single size.
225+
pub fn ratio_fixtures(size: usize) -> Vec<CompressFixture> {
226+
ALL_PATTERNS
227+
.iter()
228+
.map(|&(name, generator)| CompressFixture::new(name, generator, size))
229+
.collect()
230+
}
231+
232+
/// One row for the block-size benchmark.
233+
pub struct BlockSizeFixture {
234+
pub block_size: NonZeroU32,
235+
pub data: Vec<u32>,
236+
pub compressed: Vec<u32>,
237+
}
238+
239+
impl BlockSizeFixture {
240+
fn new(block_size: NonZeroU32, size: usize) -> Self {
241+
let data = generate_uniform_data_small_value_distribution(size);
242+
let compressed = prepare_compressed_data(&data, block_size);
243+
Self {
244+
block_size,
245+
data,
246+
compressed,
247+
}
248+
}
249+
}
250+
251+
/// Build fixtures for both block sizes at a given `size`.
252+
pub fn block_size_fixtures(size: usize) -> Vec<BlockSizeFixture> {
253+
[BLOCK_SIZE_128, BLOCK_SIZE_256]
254+
.iter()
255+
.map(|&bs| BlockSizeFixture::new(bs, size))
256+
.collect()
257+
}
258+
259+
/// One row for the C++ vs Rust decode benchmark.
260+
#[cfg(feature = "cpp")]
261+
pub struct CppDecodeFixture {
262+
pub name: &'static str,
263+
pub cpp_compressed: Vec<u32>,
264+
pub rust_compressed: Vec<u32>,
265+
pub original_len: usize,
266+
}
267+
268+
#[cfg(feature = "cpp")]
269+
impl CppDecodeFixture {
270+
fn new(name: &'static str, generator: DataGeneratorFn, size: usize) -> Self {
271+
use fastpfor::cpp::FastPFor128Codec;
272+
let data = generator(size);
273+
let codec = FastPFor128Codec::new();
274+
let cpp_compressed = cpp_encode(&codec, &data);
275+
let rust_compressed = prepare_compressed_data(&data, BLOCK_SIZE_128);
276+
Self {
277+
name,
278+
cpp_compressed,
279+
rust_compressed,
280+
original_len: size,
281+
}
282+
}
283+
}
284+
285+
/// Build C++ vs Rust decode fixtures for every `COMPRESS_PATTERNS × sizes` combination.
286+
#[cfg(feature = "cpp")]
287+
pub fn cpp_decode_fixtures(sizes: &[usize]) -> Vec<(usize, CppDecodeFixture)> {
288+
sizes
289+
.iter()
290+
.flat_map(|&size| {
291+
COMPRESS_PATTERNS
292+
.iter()
293+
.map(move |&(name, generator)| (size, CppDecodeFixture::new(name, generator, size)))
294+
})
295+
.collect()
296+
}

0 commit comments

Comments
 (0)