|
| 1 | +#![no_main] |
| 2 | + |
| 3 | +//! Fuzz the Rust FastPFOR decoder against **arbitrary** (potentially malformed) compressed bytes. |
| 4 | +//! |
| 5 | +//! Why this target is needed |
| 6 | +//! ------------------------- |
| 7 | +//! The existing `rust_decompress_oracle` target only ever feeds *well-formed* data to the Rust |
| 8 | +//! decoder (it first compresses valid input with the C++ oracle, then decompresses with Rust). |
| 9 | +//! That means corrupted or truncated compressed streams never reach the decoder, so out-of-bounds |
| 10 | +//! index panics in `decode_page` are invisible to the fuzzer. |
| 11 | +//! |
| 12 | +//! This target removes the C++ oracle entirely: arbitrary bytes are reinterpreted as `u32` words |
| 13 | +//! and handed straight to the Rust decoder. The only contract we enforce is: |
| 14 | +//! |
| 15 | +//! * A successful `Ok(...)` must produce exactly `expected_len` decompressed integers. |
| 16 | +//! * An `Err(...)` is also acceptable — the decoder is allowed to reject garbage input. |
| 17 | +//! * A **panic** is never acceptable. |
| 18 | +//! |
| 19 | +//! Running this target against the `main` branch will reproduce the panic; |
| 20 | +//! running it against the `dont-panic` branch will produce only `Ok`/`Err` outcomes. |
| 21 | +
|
| 22 | +use arbitrary::Arbitrary; |
| 23 | +use fastpfor::rust::{BLOCK_SIZE_128, BLOCK_SIZE_256, DEFAULT_PAGE_SIZE, FastPFOR, VariableByte}; |
| 24 | +use fastpfor::{CodecToSlice, rust}; |
| 25 | +use libfuzzer_sys::fuzz_target; |
| 26 | + |
| 27 | +/// Which Rust FastPFOR codec variant to exercise. |
| 28 | +#[derive(Arbitrary, Clone, Copy, Debug)] |
| 29 | +enum RustFastPForCodec { |
| 30 | + FastPFOR256, |
| 31 | + FastPFOR128, |
| 32 | + VariableByte, |
| 33 | +} |
| 34 | + |
| 35 | +/// Fuzz input: raw compressed bytes plus the codec selector and the expected decompressed length. |
| 36 | +#[derive(Arbitrary, Debug)] |
| 37 | +struct FuzzInput { |
| 38 | + /// Raw bytes that will be reinterpreted as `&[u32]` compressed data. |
| 39 | + compressed_bytes: Vec<u8>, |
| 40 | + /// How many `u32` values the decoder should attempt to produce. |
| 41 | + /// Capped inside the target to avoid enormous allocations. |
| 42 | + expected_len: u16, |
| 43 | + codec: RustFastPForCodec, |
| 44 | +} |
| 45 | + |
| 46 | +fuzz_target!(|data: FuzzInput| { |
| 47 | + // Align the byte slice to u32 by zero-padding to the next 4-byte boundary. |
| 48 | + let mut bytes = data.compressed_bytes; |
| 49 | + let rem = bytes.len() % 4; |
| 50 | + if rem != 0 { |
| 51 | + bytes.resize(bytes.len() + (4 - rem), 0); |
| 52 | + } |
| 53 | + |
| 54 | + // Safe reinterpret: bytemuck requires the slice to be properly aligned and sized. |
| 55 | + // We just constructed a Vec<u8> that is a multiple of 4 bytes. |
| 56 | + let compressed: Vec<u32> = bytes |
| 57 | + .chunks_exact(4) |
| 58 | + .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]])) |
| 59 | + .collect(); |
| 60 | + |
| 61 | + // Cap the output length to prevent huge allocations while still exercising non-trivial sizes. |
| 62 | + const MAX_LEN: usize = 4096; |
| 63 | + let expected_len = (data.expected_len as usize).min(MAX_LEN); |
| 64 | + let mut output = vec![0u32; expected_len]; |
| 65 | + |
| 66 | + // Build the codec under test. |
| 67 | + let mut codec: rust::Codec = match data.codec { |
| 68 | + RustFastPForCodec::FastPFOR256 => { |
| 69 | + rust::Codec::from(FastPFOR::new(DEFAULT_PAGE_SIZE, BLOCK_SIZE_256)) |
| 70 | + } |
| 71 | + RustFastPForCodec::FastPFOR128 => { |
| 72 | + rust::Codec::from(FastPFOR::new(DEFAULT_PAGE_SIZE, BLOCK_SIZE_128)) |
| 73 | + } |
| 74 | + RustFastPForCodec::VariableByte => rust::Codec::from(VariableByte::new()), |
| 75 | + }; |
| 76 | + |
| 77 | + // The decoder must either succeed or return an error. A panic is a bug. |
| 78 | + let _ = codec.decompress_to_slice(&compressed, &mut output); |
| 79 | +}); |
0 commit comments