Skip to content

Commit 6ba6e0b

Browse files
authored
chore(cpp): reorganize codec structure and improve error handling (#75)
First step of moving towards unified C++ and Rust model of codecs done in #73
1 parent e5e9bf4 commit 6ba6e0b

28 files changed

Lines changed: 1078 additions & 820 deletions

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ cpp_portable = ["cpp"]
3838
# Optimize FastPFOR for the current CPU.
3939
cpp_native = ["cpp"]
4040
cpp = ["dep:cmake", "dep:cxx", "dep:cxx-build"]
41-
rust = ["dep:thiserror", "dep:bytes", "dep:bytemuck"]
41+
rust = ["dep:bytes"]
4242

4343
[dependencies]
44-
bytemuck = { version = "1.25.0", optional = true }
44+
bytemuck = { version = "1.25.0", features = ["min_const_generics"] }
4545
bytes = { version = "1.11", optional = true }
4646
cxx = { version = "1.0.194", optional = true }
47-
thiserror = { version = "2.0.18", optional = true }
47+
thiserror = "2.0.18"
4848

4949
[build-dependencies]
5050
cmake = { version = "0.1.57", optional = true }

README.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -90,21 +90,21 @@ Feature selection can be overridden with the `FASTPFOR_SIMD_MODE` environment va
9090
### Using C++ Wrapper
9191

9292
```rust
93-
use fastpfor::cpp::{Codec32 as _, SimdFastPFor128Codec};
93+
use fastpfor::{AnyLenCodec as _, cpp};
9494

9595
fn main() {
96-
let mut codec = SimdFastPFor128Codec::new();
96+
let mut codec = cpp::SimdFastPFor128Codec::new();
9797

98-
// Encode
99-
let mut input = vec![1, 2, 3, 4, 5];
100-
let mut output = vec![0; 10]; // must be large enough
101-
let enc_slice = codec.encode32(&input, &mut output).unwrap();
98+
let input = vec![1u32, 2, 3, 4, 5];
99+
let mut compressed = Vec::new();
100+
codec.encode(&input, &mut compressed).unwrap();
102101

103-
// Decode
104-
let mut decoded = vec![0; 10]; // must be large enough
105-
let dec_slice = codec.decode32(&enc_slice, &mut decoded).unwrap();
102+
let mut decoded = Vec::new();
103+
codec
104+
.decode(&compressed, &mut decoded, None)
105+
.unwrap();
106106

107-
assert_eq!(input, dec_slice);
107+
assert_eq!(input, decoded);
108108
}
109109
```
110110

benches/bench_utils.rs

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ use core::ops::Range;
1313
pub use std::io::Cursor;
1414
use std::num::NonZeroU32;
1515

16+
#[cfg(feature = "cpp")]
17+
use fastpfor::AnyLenCodec as _;
18+
#[cfg(feature = "cpp")]
19+
use fastpfor::cpp;
1620
pub use fastpfor::rust::{BLOCK_SIZE_128, BLOCK_SIZE_256, DEFAULT_PAGE_SIZE, FastPFOR, Integer};
1721
use rand::rngs::StdRng;
1822
use rand::{RngExt as _, SeedableRng};
@@ -167,22 +171,24 @@ fn prepare_compressed_data(data: &[u32], block_size: NonZeroU32) -> Vec<u32> {
167171
// ---------------------------------------------------------------------------
168172

169173
#[cfg(feature = "cpp")]
170-
pub fn cpp_encode(codec: &fastpfor::cpp::FastPFor128Codec, data: &[u32]) -> Vec<u32> {
171-
use fastpfor::cpp::Codec32 as _;
172-
let mut out = vec![0u32; data.len() * 2 + 1024];
173-
let new_len = codec.encode32(data, &mut out).unwrap().len();
174-
out.truncate(new_len);
174+
pub fn cpp_encode(codec: &mut cpp::FastPFor128Codec, data: &[u32]) -> Vec<u32> {
175+
let mut out = Vec::new();
176+
codec.encode(data, &mut out).unwrap();
175177
out
176178
}
177179

178180
#[cfg(feature = "cpp")]
179181
pub fn cpp_decode(
180-
codec: &fastpfor::cpp::FastPFor128Codec,
182+
codec: &mut cpp::FastPFor128Codec,
181183
compressed: &[u32],
182184
decompressed: &mut [u32],
183185
) -> usize {
184-
use fastpfor::cpp::Codec32 as _;
185-
codec.decode32(compressed, decompressed).unwrap().len()
186+
let mut out = Vec::new();
187+
codec
188+
.decode(compressed, &mut out, Some(decompressed.len() as u32))
189+
.unwrap();
190+
decompressed.copy_from_slice(&out);
191+
out.len()
186192
}
187193

188194
// ---------------------------------------------------------------------------
@@ -268,10 +274,9 @@ pub struct CppDecodeFixture {
268274
#[cfg(feature = "cpp")]
269275
impl CppDecodeFixture {
270276
fn new(name: &'static str, generator: DataGeneratorFn, size: usize) -> Self {
271-
use fastpfor::cpp::FastPFor128Codec;
272277
let data = generator(size);
273-
let codec = FastPFor128Codec::new();
274-
let cpp_compressed = cpp_encode(&codec, &data);
278+
let mut codec = cpp::FastPFor128Codec::new();
279+
let cpp_compressed = cpp_encode(&mut codec, &data);
275280
let rust_compressed = prepare_compressed_data(&data, BLOCK_SIZE_128);
276281
Self {
277282
name,

benches/fastpfor_benchmark.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ use bench_utils::{
1313
};
1414
#[cfg(feature = "cpp")]
1515
use bench_utils::{cpp_decode, cpp_decode_fixtures, cpp_encode};
16+
#[cfg(feature = "cpp")]
17+
use fastpfor::cpp;
1618

1719
const SIZES: &[usize] = &[1024, 4096];
1820

@@ -144,17 +146,15 @@ fn benchmark_compression_ratio(c: &mut Criterion) {
144146
/// the pure-Rust `FastPFOR` codec with `BLOCK_SIZE_128`.
145147
#[cfg(feature = "cpp")]
146148
fn benchmark_cpp_vs_rust(c: &mut Criterion) {
147-
use fastpfor::cpp::FastPFor128Codec;
148-
149149
let mut group = c.benchmark_group("cpp_vs_rust/encode");
150150
for (size, fix) in compress_fixtures(SIZES) {
151151
group.throughput(Throughput::Elements(size as u64));
152152
group.bench_with_input(
153153
BenchmarkId::new(format!("cpp/{}", fix.name), size),
154154
&fix.data,
155155
|b, data| {
156-
let codec = FastPFor128Codec::new();
157-
b.iter(|| black_box(cpp_encode(&codec, black_box(data))));
156+
let mut codec = cpp::FastPFor128Codec::new();
157+
b.iter(|| black_box(cpp_encode(&mut codec, black_box(data))));
158158
},
159159
);
160160
group.bench_with_input(
@@ -175,9 +175,9 @@ fn benchmark_cpp_vs_rust(c: &mut Criterion) {
175175
BenchmarkId::new(format!("cpp/{}", fix.name), size),
176176
&fix.cpp_compressed,
177177
|b, compressed| {
178-
let codec = FastPFor128Codec::new();
178+
let mut codec = cpp::FastPFor128Codec::new();
179179
let mut out = vec![0u32; fix.original_len];
180-
b.iter(|| black_box(cpp_decode(&codec, black_box(compressed), &mut out)));
180+
b.iter(|| black_box(cpp_decode(&mut codec, black_box(compressed), &mut out)));
181181
},
182182
);
183183
group.bench_with_input(

fuzz/fuzz_targets/common.rs

Lines changed: 38 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
use fastpfor::{cpp, rust};
1+
use fastpfor::{AnyLenCodec, cpp, rust};
22

3-
pub type BoxedCppCodec = Box<dyn cpp::Codec32>;
3+
pub type BoxedCppCodec = Box<dyn AnyLenCodec>;
44

55
#[derive(arbitrary::Arbitrary)]
66
pub struct FuzzInput<C> {
@@ -77,42 +77,43 @@ pub enum CppCodec {
7777

7878
impl From<CppCodec> for BoxedCppCodec {
7979
fn from(codec: CppCodec) -> Self {
80-
use cpp::*;
8180
match codec {
82-
CppCodec::BP32 => Box::new(BP32Codec::default()),
83-
CppCodec::Copy => Box::new(CopyCodec::default()),
84-
CppCodec::FastBinaryPacking8 => Box::new(FastBinaryPacking8Codec::default()),
85-
CppCodec::FastPFor128 => Box::new(FastPFor128Codec::default()),
86-
CppCodec::FastPFor256 => Box::new(FastPFor256Codec::default()),
87-
CppCodec::FastBinaryPacking16 => Box::new(FastBinaryPacking16Codec::default()),
88-
CppCodec::FastBinaryPacking32 => Box::new(FastBinaryPacking32Codec::default()),
89-
CppCodec::MaskedVByte => Box::new(MaskedVByteCodec::default()),
90-
CppCodec::NewPFor => Box::new(NewPForCodec::default()),
91-
CppCodec::OptPFor => Box::new(OptPForCodec::default()),
92-
CppCodec::PFor2008 => Box::new(PFor2008Codec::default()),
93-
CppCodec::PFor => Box::new(PForCodec::default()),
94-
CppCodec::SimdBinaryPacking => Box::new(SimdBinaryPackingCodec::default()),
95-
CppCodec::SimdFastPFor128 => Box::new(SimdFastPFor128Codec::default()),
96-
CppCodec::SimdFastPFor256 => Box::new(SimdFastPFor256Codec::default()),
97-
CppCodec::SimdGroupSimple => Box::new(SimdGroupSimpleCodec::default()),
98-
CppCodec::SimdGroupSimpleRingBuf => Box::new(SimdGroupSimpleRingBufCodec::default()),
99-
CppCodec::SimdNewPFor => Box::new(SimdNewPForCodec::default()),
100-
CppCodec::SimdOptPFor => Box::new(SimdOptPForCodec::default()),
101-
CppCodec::SimdPFor => Box::new(SimdPForCodec::default()),
102-
CppCodec::SimdSimplePFor => Box::new(SimdSimplePForCodec::default()),
103-
// CppCodec::Simple16 => Box::new(Simple16Codec::default()),
104-
// CppCodec::Simple8b => Box::new(Simple8bCodec::default()),
105-
// CppCodec::Simple8bRle => Box::new(Simple8bRleCodec::default()),
106-
// CppCodec::Simple9 => Box::new(Simple9Codec::default()),
107-
// CppCodec::Simple9Rle => Box::new(Simple9RleCodec::default()),
108-
// CppCodec::SimplePFor => Box::new(SimplePForCodec::default()),
109-
// CppCodec::Snappy => Box::new(SnappyCodec::default()),
110-
CppCodec::StreamVByte => Box::new(StreamVByteCodec::default()),
111-
CppCodec::VByte => Box::new(VByteCodec::default()),
112-
CppCodec::VarInt => Box::new(VarIntCodec::default()),
113-
// CppCodec::VarIntG8iu => Box::new(VarIntG8iuCodec::default()),
114-
CppCodec::VarIntGb => Box::new(VarIntGbCodec::default()),
115-
// CppCodec::VsEncoding => Box::new(VsEncodingCodec::default()),
81+
CppCodec::BP32 => Box::new(cpp::BP32Codec::default()),
82+
CppCodec::Copy => Box::new(cpp::CopyCodec::default()),
83+
CppCodec::FastBinaryPacking8 => Box::new(cpp::FastBinaryPacking8Codec::default()),
84+
CppCodec::FastPFor128 => Box::new(cpp::FastPFor128Codec::default()),
85+
CppCodec::FastPFor256 => Box::new(cpp::FastPFor256Codec::default()),
86+
CppCodec::FastBinaryPacking16 => Box::new(cpp::FastBinaryPacking16Codec::default()),
87+
CppCodec::FastBinaryPacking32 => Box::new(cpp::FastBinaryPacking32Codec::default()),
88+
CppCodec::MaskedVByte => Box::new(cpp::MaskedVByteCodec::default()),
89+
CppCodec::NewPFor => Box::new(cpp::NewPForCodec::default()),
90+
CppCodec::OptPFor => Box::new(cpp::OptPForCodec::default()),
91+
CppCodec::PFor2008 => Box::new(cpp::PFor2008Codec::default()),
92+
CppCodec::PFor => Box::new(cpp::PForCodec::default()),
93+
CppCodec::SimdBinaryPacking => Box::new(cpp::SimdBinaryPackingCodec::default()),
94+
CppCodec::SimdFastPFor128 => Box::new(cpp::SimdFastPFor128Codec::default()),
95+
CppCodec::SimdFastPFor256 => Box::new(cpp::SimdFastPFor256Codec::default()),
96+
CppCodec::SimdGroupSimple => Box::new(cpp::SimdGroupSimpleCodec::default()),
97+
CppCodec::SimdGroupSimpleRingBuf => {
98+
Box::new(cpp::SimdGroupSimpleRingBufCodec::default())
99+
}
100+
CppCodec::SimdNewPFor => Box::new(cpp::SimdNewPForCodec::default()),
101+
CppCodec::SimdOptPFor => Box::new(cpp::SimdOptPForCodec::default()),
102+
CppCodec::SimdPFor => Box::new(cpp::SimdPForCodec::default()),
103+
CppCodec::SimdSimplePFor => Box::new(cpp::SimdSimplePForCodec::default()),
104+
// CppCodec::Simple16 => Box::new(cpp::Simple16Codec::default()),
105+
// CppCodec::Simple8b => Box::new(cpp::Simple8bCodec::default()),
106+
// CppCodec::Simple8bRle => Box::new(cpp::Simple8bRleCodec::default()),
107+
// CppCodec::Simple9 => Box::new(cpp::Simple9Codec::default()),
108+
// CppCodec::Simple9Rle => Box::new(cpp::Simple9RleCodec::default()),
109+
// CppCodec::SimplePFor => Box::new(cpp::SimplePForCodec::default()),
110+
// CppCodec::Snappy => Box::new(cpp::SnappyCodec::default()),
111+
CppCodec::StreamVByte => Box::new(cpp::StreamVByteCodec::default()),
112+
CppCodec::VByte => Box::new(cpp::VByteCodec::default()),
113+
CppCodec::VarInt => Box::new(cpp::VarIntCodec::default()),
114+
// CppCodec::VarIntG8iu => Box::new(cpp::VarIntG8iuCodec::default()),
115+
CppCodec::VarIntGb => Box::new(cpp::VarIntGbCodec::default()),
116+
// CppCodec::VsEncoding => Box::new(cpp::VsEncodingCodec::default()),
116117
}
117118
}
118119
}

fuzz/fuzz_targets/cpp_roundtrip.rs

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,26 @@ mod common;
55
use common::*;
66

77
fuzz_target!(|data: FuzzInput<CppCodec>| {
8-
let codec = BoxedCppCodec::from(data.codec);
8+
let mut codec = BoxedCppCodec::from(data.codec);
99
let input = data.data;
1010

11-
// Allocate output buffer with generous size
12-
let mut output = vec![0u32; input.len() * 2 + 1024];
11+
let mut compressed = Vec::new();
12+
codec.encode(&input, &mut compressed).unwrap();
1313

14-
// Compress the data
15-
let enc_slice = codec.encode32(&input, &mut output).unwrap();
16-
17-
// Now decompress
18-
let mut decoded = vec![0u32; input.len() * 2 + 1024];
19-
let dec_slice = codec.decode32(enc_slice, &mut decoded).unwrap();
14+
let mut decoded = Vec::new();
15+
codec
16+
.decode(&compressed, &mut decoded, None)
17+
.expect("decode");
2018

2119
// Verify roundtrip
22-
if dec_slice.len() + input.len() < 200 {
23-
assert_eq!(input, dec_slice, "Decompressed output mismatches");
20+
if decoded.len() + input.len() < 200 {
21+
assert_eq!(input, decoded.as_slice(), "Decompressed output mismatches");
2422
} else {
25-
assert_eq!(dec_slice.len(), input.len(), "Decompressed length mismatch");
26-
for (i, (&original, &decoded)) in input.iter().zip(dec_slice.iter()).enumerate() {
23+
assert_eq!(decoded.len(), input.len(), "Decompressed length mismatch");
24+
for (i, (&original, &out)) in input.iter().zip(decoded.iter()).enumerate() {
2725
assert_eq!(
28-
original, decoded,
29-
"Mismatch at position {i}: expected {original}, got {decoded}"
26+
original, out,
27+
"Mismatch at position {i}: expected {original}, got {out}"
3028
);
3129
}
3230
}

fuzz/fuzz_targets/rust_compress_oracle.rs

Lines changed: 19 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#![no_main]
22

3-
use fastpfor::{CodecToSlice, cpp, rust};
3+
use fastpfor::{AnyLenCodec, CodecToSlice, cpp, rust};
44
use libfuzzer_sys::fuzz_target;
55
mod common;
66
use common::*;
@@ -28,43 +28,32 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
2828
let last_block_size_multiple = input.len() / block_size * block_size;
2929
let input = &input[..last_block_size_multiple];
3030

31-
// Allocate output buffers with generous size
31+
// Allocate output buffer for Rust (slice API)
3232
let mut rust_compressed = vec![0u32; input.len() * 2 + 1024];
33-
let mut cpp_compressed = vec![0u32; input.len() * 2 + 1024];
3433

3534
// Compress with Rust implementation using Codec wrapper
3635
let mut rust_codec = rust::Codec::from(data.codec);
3736
let rust_result = rust_codec
3837
.compress_to_slice(input, &mut rust_compressed)
3938
.expect("Rust compression failed");
4039

41-
// Compress with C++ implementation
42-
let compressed_oracle_from_cpp = match data.codec {
43-
RustCodec::FastPFOR256 => {
44-
let mut cpp_codec = cpp::FastPFor256Codec::new();
45-
cpp_codec
46-
.compress_to_slice(input, &mut cpp_compressed)
47-
.expect("C++ compression failed")
48-
}
49-
RustCodec::FastPFOR128 => {
50-
let mut cpp_codec = cpp::FastPFor128Codec::new();
51-
cpp_codec
52-
.compress_to_slice(input, &mut cpp_compressed)
53-
.expect("C++ compression failed")
54-
}
55-
RustCodec::VariableByte => {
56-
let mut cpp_codec = cpp::MaskedVByteCodec::new();
57-
cpp_codec
58-
.compress_to_slice(input, &mut cpp_compressed)
59-
.expect("C++ compression failed")
60-
}
61-
RustCodec::JustCopy => {
62-
let mut cpp_codec = cpp::CopyCodec::new();
63-
cpp_codec
64-
.compress_to_slice(input, &mut cpp_compressed)
65-
.expect("C++ compression failed")
66-
}
67-
};
40+
// Compress with C++ implementation (`AnyLenCodec` / Vec API)
41+
let mut cpp_compressed = Vec::new();
42+
match data.codec {
43+
RustCodec::FastPFOR256 => cpp::FastPFor256Codec::new()
44+
.encode(input, &mut cpp_compressed)
45+
.expect("C++ compression failed"),
46+
RustCodec::FastPFOR128 => cpp::FastPFor128Codec::new()
47+
.encode(input, &mut cpp_compressed)
48+
.expect("C++ compression failed"),
49+
RustCodec::VariableByte => cpp::MaskedVByteCodec::new()
50+
.encode(input, &mut cpp_compressed)
51+
.expect("C++ compression failed"),
52+
RustCodec::JustCopy => cpp::CopyCodec::new()
53+
.encode(input, &mut cpp_compressed)
54+
.expect("C++ compression failed"),
55+
}
56+
let compressed_oracle_from_cpp = cpp_compressed.as_slice();
6857

6958
// Compare compressed outputs
7059
assert_eq!(

0 commit comments

Comments
 (0)