Skip to content

Commit 2d51317

Browse files
perf: Rust decoder 30% faster than C++, and NO unsafe (#69)
* improve rust decoder by 100x+ -- was massively slower due to un-inlined decoding bit functions * bump version to 0.8.1 * some minor just file cleanup * improve readme --------- Co-authored-by: Frank Elsinga <frank@elsinga.de>
1 parent bfebe04 commit 2d51317

13 files changed

Lines changed: 1550 additions & 1223 deletions

File tree

Cargo.toml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "fastpfor"
3-
version = "0.8.0"
3+
version = "0.8.1"
44
description = "FastPFOR lib with C++ Rust wrapper and pure Rust implementation"
55
authors = [
66
"Francisco Jimenez <jjcfrank@gmail.com>",
@@ -24,17 +24,18 @@ harness = false
2424
[features]
2525
# Eventually we may want to build without the C++ bindings by default.
2626
# Keeping it on for now to simplify development.
27-
default = ["cpp"]
27+
default = ["cpp", "rust"]
2828
# Used internally for testing and benchmarking. Not intended for public use.
2929
_all_compatible = ["cpp_portable", "rust"]
3030
# Use portable C++ code that will not rely on the latest CPU features. This is the default for the C++ bindings.
3131
cpp_portable = ["cpp"]
3232
# Optimize FastPFOR for the current CPU.
3333
cpp_native = ["cpp"]
3434
cpp = ["dep:cmake", "dep:cxx", "dep:cxx-build"]
35-
rust = ["dep:thiserror", "dep:bytes"]
35+
rust = ["dep:thiserror", "dep:bytes", "dep:bytemuck"]
3636

3737
[dependencies]
38+
bytemuck = { version = "1.25.0", optional = true }
3839
bytes = { version = "1.11", optional = true }
3940
cxx = { version = "1.0.194", optional = true }
4041
thiserror = { version = "2.0.18", optional = true }

README.md

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
[![license](https://img.shields.io/crates/l/fastpfor.svg)](https://github.com/jjcfrancisco/fastpfor/blob/main/LICENSE-APACHE)
77
[![CI build](https://github.com/jjcfrancisco/fastpfor/actions/workflows/ci.yml/badge.svg)](https://github.com/jjcfrancisco/fastpfor/actions)
88

9-
This is a Rust wrapper for the [C++ FastPFor library](https://github.com/fast-pack/FastPFor), as well as a pure Rust re-implementation (work in progress). Supports 32-bit and 64-bit integers, and SIMD-optimized codecs for 128-bit and 256-bit vectors. Based on the [Decoding billions of integers per second through vectorization, 2012](https://arxiv.org/abs/1209.2137) paper.
9+
This is a Rust wrapper for the [C++ FastPFor library](https://github.com/fast-pack/FastPFor), as well as a pure Rust re-implementation. Supports 32-bit and 64-bit integers, and SIMD-optimized codecs for 128-bit and 256-bit vectors. Based on the [Decoding billions of integers per second through vectorization, 2012](https://arxiv.org/abs/1209.2137) paper.
10+
11+
The Rust **decoder** is about 29% faster than the C++ version. The Rust implementation contains no `unsafe` code, and when built without the `cpp` feature this crate has `#![forbid(unsafe_code)]`.
1012

1113
### Supported algorithms
1214
Unless otherwise specified, all codecs support `&[u32]` only.
@@ -45,11 +47,31 @@ Unless otherwise specified, all codecs support `&[u32]` only.
4547
* VarIntGb
4648
```
4749

50+
## Benchmarks
51+
### Decoding
52+
53+
Using Linux x86-64 running `just bench::cpp-vs-rust-decode native`. The values below are time measurements; smaller values indicate faster decoding.
54+
55+
| name | cpp (ns) | rust (ns) | % faster |
56+
|-----------------------------------------|----------|-----------|----------|
57+
| `clustered/1024` | 643.24 | 392.93 | 38.91% |
58+
| `clustered/4096` | 1986 | 1414.8 | 28.76% |
59+
| `sequential/1024` | 653.69 | 396.02 | 39.42% |
60+
| `sequential/4096` | 2106 | 1476.2 | 29.91% |
61+
| `sparse/1024` | 428.8 | 352.38 | 17.82% |
62+
| `sparse/4096` | 1114 | 1179.5 | -5.88% |
63+
| `uniform_large_value_distribution/1024` | 286.74 | 153.06 | 46.62% |
64+
| `uniform_large_value_distribution/4096` | 748.19 | 558.05 | 25.41% |
65+
| `uniform_small_value_distribution/1024` | 606.4 | 405.44 | 33.14% |
66+
| `uniform_small_value_distribution/4096` | 2017.3 | 1403.7 | 30.42% |
67+
68+
Rust Encoding has not yet been either optimized or even fully verified.
69+
4870
## Usage
4971

5072
### Crate Features
51-
* `cpp` - C++ implementation (default, uses portable SIMD mode)
52-
* `rust` - Rust implementation (work in progress, opt-in)
73+
* `cpp` - C++ implementation (uses portable SIMD mode)
74+
* `rust` - Rust implementation (safe Rust code, no `unsafe` blocks)
5375

5476
#### SIMD Mode Configuration
5577

benches/fastpfor_benchmark.rs

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -117,9 +117,8 @@ fn prepare_compressed_data(data: &[u32], block_size: NonZeroU32) -> Vec<u32> {
117117
compressed
118118
}
119119

120-
/// Helper function to decompress data
121-
fn decompress_data(codec: &mut FastPFOR, compressed: &[u32], original_size: usize) -> usize {
122-
let mut decompressed = vec![0u32; original_size];
120+
/// The caller must ensure that the `decompressed` slice is large enough to hold the output.
121+
fn decompress_data(codec: &mut FastPFOR, compressed: &[u32], decompressed: &mut [u32]) -> usize {
123122
let mut input_offset = Cursor::new(0);
124123
let mut output_offset = Cursor::new(0);
125124

@@ -128,7 +127,7 @@ fn decompress_data(codec: &mut FastPFOR, compressed: &[u32], original_size: usiz
128127
compressed,
129128
compressed.len() as u32,
130129
&mut input_offset,
131-
&mut decompressed,
130+
decompressed,
132131
&mut output_offset,
133132
)
134133
.unwrap();
@@ -158,10 +157,8 @@ fn benchmark_compression(c: &mut Criterion) {
158157
let data = generator(size);
159158
group.throughput(Throughput::Elements(size as u64));
160159
group.bench_with_input(BenchmarkId::new(*name, size), &data, |b, data| {
161-
b.iter(|| {
162-
let mut codec = FastPFOR::default();
163-
black_box(compress_data(&mut codec, black_box(data)))
164-
});
160+
let mut codec = FastPFOR::default();
161+
b.iter(|| black_box(compress_data(&mut codec, black_box(data))));
165162
});
166163
}
167164
}
@@ -196,9 +193,14 @@ fn benchmark_decompression(c: &mut Criterion) {
196193
BenchmarkId::new(*name, size),
197194
&(compressed, size),
198195
|b, (compressed, size)| {
196+
let mut codec = FastPFOR::new(DEFAULT_PAGE_SIZE, BLOCK_SIZE_128);
197+
let mut decompressed = vec![0u32; *size];
199198
b.iter(|| {
200-
let mut codec = FastPFOR::default();
201-
black_box(decompress_data(&mut codec, black_box(compressed), *size))
199+
black_box(decompress_data(
200+
&mut codec,
201+
black_box(compressed),
202+
&mut decompressed,
203+
))
202204
});
203205
},
204206
);
@@ -219,11 +221,11 @@ fn benchmark_roundtrip(c: &mut Criterion) {
219221
BenchmarkId::new("compress_decompress", size),
220222
&data,
221223
|b, data| {
224+
let mut codec1 = FastPFOR::default();
225+
let mut codec2 = FastPFOR::default();
226+
let mut compressed = vec![0u32; data.len() * 2];
227+
let mut decompressed = vec![0u32; data.len()];
222228
b.iter(|| {
223-
let mut codec1 = FastPFOR::default();
224-
let mut codec2 = FastPFOR::default();
225-
let mut compressed = vec![0u32; data.len() * 2];
226-
let mut decompressed = vec![0u32; data.len()];
227229
let mut input_offset = Cursor::new(0);
228230
let mut output_offset = Cursor::new(0);
229231

@@ -285,8 +287,13 @@ fn benchmark_block_sizes(c: &mut Criterion) {
285287
group.throughput(Throughput::Elements(size as u64));
286288
group.bench_function(format!("decompress_{block_size}"), |b| {
287289
let mut codec = FastPFOR::new(DEFAULT_PAGE_SIZE, block_size);
290+
let mut decompressed = vec![0u32; size];
288291
b.iter(|| {
289-
black_box(decompress_data(&mut codec, black_box(&compressed), size));
292+
black_box(decompress_data(
293+
&mut codec,
294+
black_box(&compressed),
295+
&mut decompressed,
296+
));
290297
});
291298
});
292299
}
@@ -317,8 +324,8 @@ fn benchmark_compression_ratio(c: &mut Criterion) {
317324
for (name, data_fn) in patterns {
318325
let data = data_fn(size);
319326
group.bench_function(*name, |b| {
327+
let mut codec = FastPFOR::default();
320328
b.iter(|| {
321-
let mut codec = FastPFOR::default();
322329
let compressed_size = compress_data(&mut codec, black_box(&data));
323330
#[expect(
324331
clippy::cast_precision_loss,
@@ -429,7 +436,14 @@ fn benchmark_cpp_vs_rust(c: &mut Criterion) {
429436
&rust_compressed,
430437
|b, compressed| {
431438
let mut codec = FastPFOR::new(DEFAULT_PAGE_SIZE, BLOCK_SIZE_128);
432-
b.iter(|| black_box(decompress_data(&mut codec, black_box(compressed), size)));
439+
let mut decompressed = vec![0u32; size];
440+
b.iter(|| {
441+
black_box(decompress_data(
442+
&mut codec,
443+
black_box(compressed),
444+
&mut decompressed,
445+
))
446+
});
433447
},
434448
);
435449
}

fuzz/justfile

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env just --justfile
2+
# Fuzz testing recipes. All commands must be run from the repo root:
3+
# just fuzz::run rust_compress_oracle
4+
# cargo-fuzz requires nightly Rust and must be run from inside the fuzz/ directory.
5+
6+
fuzz_dir := justfile_directory()
7+
8+
# List available fuzz targets
9+
list:
10+
@cd {{fuzz_dir}} && cargo +nightly fuzz list
11+
12+
# Run a fuzz target indefinitely (Ctrl-C to stop)
13+
# Targets: rust_compress_oracle, rust_decompress_oracle, cpp_roundtrip
14+
run target *args:
15+
cd {{fuzz_dir}} && cargo +nightly fuzz run --target x86_64-unknown-linux-gnu {{target}} {{args}}
16+
17+
# Run a fuzz target for a fixed number of seconds
18+
run-time target seconds='60' *args:
19+
cd {{fuzz_dir}} && cargo +nightly fuzz run {{target}} {{args}} -- -max_total_time={{seconds}}
20+
21+
# Run a fuzz target for a fixed number of iterations
22+
run-iters target iters='10000' *args:
23+
cd {{fuzz_dir}} && cargo +nightly fuzz run {{target}} {{args}} -- -runs={{iters}}
24+
25+
# Run rust_compress_oracle (Rust only, no C++ required)
26+
rust-compress *args: (run 'rust_compress_oracle' args)
27+
28+
# Run rust_decompress_oracle (uses C++ as oracle)
29+
rust-decompress *args: (run 'rust_decompress_oracle' args)
30+
31+
# Run cpp_roundtrip (C++ roundtrip)
32+
cpp-roundtrip *args: (run 'cpp_roundtrip' args)
33+
34+
# Reproduce a specific crash artifact
35+
repro target artifact:
36+
cd {{fuzz_dir}} && cargo +nightly fuzz run {{target}} {{artifact}}
37+
38+
# Build all fuzz targets without running them
39+
build:
40+
cd {{fuzz_dir}} && cargo +nightly fuzz build --target x86_64-unknown-linux-gnu
41+
42+
# Print coverage for a fuzz target (requires llvm-tools)
43+
coverage target:
44+
cd {{fuzz_dir}} && cargo +nightly fuzz coverage --target x86_64-unknown-linux-gnu {{target}}

justfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ export RUSTDOCFLAGS := env('RUSTDOCFLAGS', if ci_mode == '1' {'-D warnings'} els
1515
export RUST_BACKTRACE := env('RUST_BACKTRACE', if ci_mode == '1' {'1'} else {'0'})
1616

1717
mod bench 'benches/justfile'
18+
mod fuzz 'fuzz/justfile'
1819

1920
@_default:
2021
{{just}} --list
@@ -30,6 +31,8 @@ build:
3031
# Quick compile without building a binary
3132
check:
3233
cargo check --workspace --all-targets --features _all_compatible
34+
cargo check --workspace --all-targets --no-default-features --features cpp
35+
cargo check --workspace --all-targets --no-default-features --features rust
3336

3437
# Generate code coverage report to upload to codecov.io
3538
ci-coverage: env-info && \
@@ -38,7 +41,7 @@ ci-coverage: env-info && \
3841
mkdir -p target/llvm-cov
3942

4043
# Run all tests as expected by CI
41-
ci-test: env-info test-fmt build clippy test test-doc && assert-git-is-clean
44+
ci-test: env-info test-fmt check build clippy test test-doc && assert-git-is-clean
4245

4346
# Run minimal subset of tests to ensure compatibility with MSRV
4447
ci-test-msrv: env-info test

src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#![cfg_attr(not(feature = "cpp"), forbid(unsafe_code))]
12
#![cfg_attr(docsrs, feature(doc_cfg))]
23
#![doc = include_str!("../README.md")]
34

@@ -14,6 +15,7 @@ compile_error!("At least one of the features 'cpp' or 'rust' must be enabled");
1415
pub mod cpp;
1516

1617
#[cfg(feature = "rust")]
18+
#[forbid(unsafe_code, reason = "Rust code must always be safe")]
1719
/// Rust re-implementation of `FastPFor` (work in progress)
1820
pub mod rust;
1921

0 commit comments

Comments
 (0)