Skip to content

Commit 14687bf

Browse files
fix(rust): VariableByte not matching the cpp version (#60)
`VariableByte` currently matches none of the schemes in the Cpp impl. Now it matches `MaskedVByteCodec` or `VByteCodec`. I don't quite know why VB is implemented differently compared to "the reference". The codec appears in 6e232dd#diff-01152c24d242e2ecf9520ee8a8bbb63b7aa622e3d4a530a19f4acfe9f970f7ad I am not entirely sure if it is 100% correct, but it is better than before (as in: actually passes tests). I am sure that there are 2-3 correctness issues in this code.. grumble.. I am also not sure about some of the chocies in Cpp, for example why 10? .. 🤷🏻‍♂️
1 parent e5a00ea commit 14687bf

6 files changed

Lines changed: 392 additions & 143 deletions

File tree

fuzz/fuzz_targets/common.rs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ pub struct FuzzInput<C> {
1111

1212
impl<C: std::fmt::Debug> std::fmt::Debug for FuzzInput<C> {
1313
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
14-
f.debug_struct("FuzzInput<C>")
15-
.field("data_length", &self.data.len())
14+
f.debug_struct("FuzzInput")
1615
.field("codec", &self.codec)
16+
.field("data", &HexSlice(&self.data))
1717
.finish()
1818
}
1919
}
@@ -117,3 +117,26 @@ impl From<CppCodec> for BoxedCppCodec {
117117
}
118118
}
119119
}
120+
121+
pub struct HexSlice<'a>(pub &'a [u32]);
122+
123+
impl<'a> std::fmt::Debug for HexSlice<'a> {
124+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125+
const MAX: usize = 20;
126+
127+
let total = self.0.len();
128+
let shown = total.min(MAX);
129+
130+
let mut list = f.debug_list();
131+
132+
for v in &self.0[..shown] {
133+
list.entry(&format_args!("{:#010x}", v));
134+
}
135+
136+
if total > MAX {
137+
list.entry(&format_args!(".. out of {} total", total));
138+
}
139+
140+
list.finish()
141+
}
142+
}

fuzz/fuzz_targets/rust_compress_oracle.rs

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,6 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
1313
return;
1414
}
1515

16-
// TODO: Behaviour differs
17-
if data.codec == RustCodec::VariableByte {
18-
return;
19-
}
20-
2116
// TODO: To make the encoder not crash -> Skip inputs smaller than block size
2217
let block_size = match data.codec {
2318
RustCodec::FastPFOR256 => 256,
@@ -44,7 +39,7 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
4439
.expect("Rust compression failed");
4540

4641
// Compress with C++ implementation
47-
let cpp_result = match data.codec {
42+
let compressed_oracle_from_cpp = match data.codec {
4843
RustCodec::FastPFOR256 => {
4944
let mut cpp_codec = cpp::FastPFor256Codec::new();
5045
cpp_codec
@@ -58,7 +53,7 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
5853
.expect("C++ compression failed")
5954
}
6055
RustCodec::VariableByte => {
61-
let mut cpp_codec = cpp::VByteCodec::new();
56+
let mut cpp_codec = cpp::MaskedVByteCodec::new();
6257
cpp_codec
6358
.compress_to_slice(input, &mut cpp_compressed)
6459
.expect("C++ compression failed")
@@ -74,13 +69,13 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
7469
// Compare compressed outputs
7570
assert_eq!(
7671
rust_result.len(),
77-
cpp_result.len(),
72+
compressed_oracle_from_cpp.len(),
7873
"Compressed length mismatch: Rust={}, C++={}",
7974
rust_result.len(),
80-
cpp_result.len()
75+
compressed_oracle_from_cpp.len()
8176
);
8277

83-
for (i, (&rust_val, &cpp_val)) in rust_result.iter().zip(cpp_result.iter()).enumerate() {
78+
for (i, (&rust_val, &cpp_val)) in rust_result.iter().zip(compressed_oracle_from_cpp.iter()).enumerate() {
8479
assert_eq!(
8580
rust_val, cpp_val,
8681
"Compressed data mismatch at position {}: Rust={}, C++={}",

fuzz/fuzz_targets/rust_decompress_oracle.rs

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#![no_main]
22

3-
use fastpfor::{CodecToSlice, cpp, rust};
3+
use fastpfor::{cpp, rust, CodecToSlice};
44
use libfuzzer_sys::fuzz_target;
55
mod common;
66
use common::*;
@@ -13,11 +13,6 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
1313
return;
1414
}
1515

16-
// TODO: Behaviour differs
17-
if data.codec == RustCodec::VariableByte {
18-
return;
19-
}
20-
2116
// TODO: To make the decoder not crash -> Skip inputs smaller than block size
2217
let block_size = match data.codec {
2318
RustCodec::FastPFOR256 => 256,
@@ -35,7 +30,7 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
3530

3631
// First, compress with C++ implementation to get valid compressed data
3732
let mut cpp_compressed = vec![0u32; input.len() * 2 + 1024];
38-
let compressed_data = match data.codec {
33+
let compressed_oracle_from_cpp = match data.codec {
3934
RustCodec::FastPFOR256 => {
4035
let mut cpp_codec = cpp::FastPFor256Codec::new();
4136
cpp_codec
@@ -49,7 +44,7 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
4944
.expect("C++ compression failed")
5045
}
5146
RustCodec::VariableByte => {
52-
let mut cpp_codec = cpp::VByteCodec::new();
47+
let mut cpp_codec = cpp::MaskedVByteCodec::new();
5348
cpp_codec
5449
.compress_to_slice(input, &mut cpp_compressed)
5550
.expect("C++ compression failed")
@@ -66,7 +61,7 @@ fuzz_target!(|data: FuzzInput<RustCodec>| {
6661
let mut rust_decompressed = vec![0u32; input.len()];
6762
let mut rust_codec = rust::Codec::from(data.codec);
6863
let rust_result = rust_codec
69-
.decompress_to_slice(compressed_data, &mut rust_decompressed)
64+
.decompress_to_slice(compressed_oracle_from_cpp, &mut rust_decompressed)
7065
.expect("Rust decompression failed");
7166

7267
// Compare decompressed outputs

src/rust/integer_compression/helpers.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -35,15 +35,3 @@ fn bitlen(x: u64) -> i32 {
3535
}
3636
64 - clz(x) as i32
3737
}
38-
39-
/// Extracts 7 bits from `val` at the position specified by `i` (0-indexed, each position is 7 bits).
40-
/// The result is masked to ensure only 7 bits are returned.
41-
pub fn extract7bits(i: i32, val: i64) -> u8 {
42-
((val >> (7 * i)) & ((1 << 7) - 1)) as u8
43-
}
44-
45-
/// Extracts 7 bits from `val` at the position specified by `i` without masking.
46-
/// Caller must ensure proper masking if needed.
47-
pub fn extract_7bits_maskless(i: i32, val: i64) -> u8 {
48-
(val >> (7 * i)) as u8
49-
}

0 commit comments

Comments
 (0)