Skip to content

Commit 2d5d4e7

Browse files
feat!: add an unified CodecToSlice<In, Out = In> API (#58)
This PR adds a new trait `CodecToSlice<In, Out = In>`: ```rust /// Low-level compression interface using caller-provided buffers. /// /// Codecs write into pre-allocated slices and return a sub-slice showing exactly /// what was written. Works across FFI boundaries and allows buffer reuse. /// /// # Type Parameters /// /// - `In`: Input data type (e.g., `u32` or `u64` for integer codecs) /// - `Out`: Compressed output type (defaults to `In`, but may differ - e.g., /// 64-bit integers compress to 32-bit words: `CodecToSlice<u64, u32>`) /// /// # Buffer Sizing /// /// Caller must ensure output buffers are large enough. For compression, estimate /// `input.len() * 2 + 1024`. For decompression, size depends on the codec. pub trait CodecToSlice<In, Out = In> { /// Error type returned by compression/decompression operations. type Error; /// Compresses input into output buffer, returning slice of data written. fn compress_to_slice<'out>( &mut self, input: &[In], output: &'out mut [Out], ) -> Result<&'out [Out], Self::Error>; /// Decompresses input into output buffer, returning slice of data written. /// /// Output size cannot be known in advance for some codecs (e.g., RLE). fn decompress_to_slice<'out>( &mut self, input: &[Out], output: &'out mut [In], ) -> Result<&'out [In], Self::Error>; } ``` Here are the alternatives I have considered: - using `bytes` or `Cursor` based API does not work for cpp - using `&mut Vec` does not work for being zero abstraction and likely not work for cpp (because of what users would expect) - ```rust pub trait Codec<In, Out=In> { type Error; fn compress(&mut self, input: &[In]) -> Result<Vec<Out>, Self::Error>; fn decompress(&mut self, input: &[In]) -> Result<Vec<Out>, Self::Error>; } ``` Pro: Works well for the rust world Con: may alocate - ```rust pub trait Codec { type CompressBuilder; fn compress_builder<'out>(&mut self, input: &[u32]) -> CompressBuilder<'out>; } pub struct CompressOp<'a> { /* ... */ } impl CompressOp<'_> { fn to_slice(self, output: &mut [u32]) -> Result<&[u32], Error> { ... } fn to_vec(self) -> Result<Vec<u32>, Error> { ... } fn to_buf(self, buf: impl BufMut) -> Result<usize, Error> { ... } } ``` best of both worlds, some usage difficulty though. Not sure if this would be better I chose `CodecToSlice` since this way we can migrate step by step and still have the "good name" `Codec` for the final trait that we can settle on, once we have a better idea what kind of API we really need. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 115d91a commit 2d5d4e7

5 files changed

Lines changed: 225 additions & 48 deletions

File tree

src/cpp/mod.rs

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
pub use cxx::Exception;
55
use cxx::UniquePtr;
66

7+
use crate::CodecToSlice;
8+
79
/// FFI bridge to the C++ FastPFOR library.
810
///
911
/// This module contains the raw FFI declarations for interfacing with the C++ code.
@@ -142,6 +144,51 @@ pub trait Codec32: CodecWrapper {
142144
}
143145
}
144146

147+
impl<C: Codec32> CodecToSlice<u32> for C {
148+
type Error = Exception;
149+
150+
fn compress_to_slice<'out>(
151+
&mut self,
152+
input: &[u32],
153+
output: &'out mut [u32],
154+
) -> Result<&'out [u32], Self::Error> {
155+
let result = self.encode32(input, output)?;
156+
Ok(result)
157+
}
158+
159+
fn decompress_to_slice<'out>(
160+
&mut self,
161+
input: &[u32],
162+
output: &'out mut [u32],
163+
) -> Result<&'out [u32], Self::Error> {
164+
let result = self.decode32(input, output)?;
165+
Ok(result)
166+
}
167+
}
168+
169+
// Note: 64-bit integers are compressed into 32-bit word arrays.
170+
impl<C: Codec64> CodecToSlice<u64, u32> for C {
171+
type Error = Exception;
172+
173+
fn compress_to_slice<'out>(
174+
&mut self,
175+
input: &[u64],
176+
output: &'out mut [u32],
177+
) -> Result<&'out [u32], Self::Error> {
178+
let result = self.encode64(input, output)?;
179+
Ok(result)
180+
}
181+
182+
fn decompress_to_slice<'out>(
183+
&mut self,
184+
input: &[u32],
185+
output: &'out mut [u64],
186+
) -> Result<&'out [u64], Self::Error> {
187+
let result = self.decode64(input, output)?;
188+
Ok(result)
189+
}
190+
}
191+
145192
/// Trait for codecs that support 64-bit integer compression.
146193
///
147194
/// Only certain codecs support 64-bit integers. These are marked with the `@ 64`
@@ -385,38 +432,50 @@ mod tests {
385432

386433
#[test]
387434
fn test_32() {
388-
let codec = FastPFor128Codec::new();
435+
let mut codec = FastPFor128Codec::new();
389436
let input = vec![1, 2, 3, 4, 5];
390437
let mut output = vec![0; 10];
391438
let mut output2 = vec![0; 10];
439+
let mut output3 = vec![0; 10];
392440
let encoded = codec.encode32(&input, &mut output).unwrap();
393441
let encoded2 = codec.encode32(&input, &mut output2).unwrap();
442+
let encoded3 = codec.compress_to_slice(&input, &mut output3).unwrap();
394443
assert_eq!(encoded, encoded2);
444+
assert_eq!(encoded, encoded3);
395445

396446
let mut decoded = vec![0; 10];
397447
let mut decoded2 = vec![0; 10];
448+
let mut decoded3 = vec![0; 10];
398449
let decoded = codec.decode32(encoded, &mut decoded).unwrap();
399450
let decoded2 = codec.decode32(encoded, &mut decoded2).unwrap();
451+
let decoded3 = codec.decompress_to_slice(encoded, &mut decoded3).unwrap();
400452
assert_eq!(decoded, decoded2);
453+
assert_eq!(decoded, decoded3);
401454

402455
assert_eq!(decoded, input);
403456
}
404457

405458
#[test]
406459
fn test_64() {
407-
let codec = FastPFor128Codec::new();
460+
let mut codec = FastPFor128Codec::new();
408461
let input = vec![1, 2, 3, 4, 5];
409462
let mut output = vec![0; 10];
410463
let mut output2 = vec![0; 10];
464+
let mut output3 = vec![0; 10];
411465
let encoded = codec.encode64(&input, &mut output).unwrap();
412466
let encoded2 = codec.encode64(&input, &mut output2).unwrap();
467+
let encoded3 = codec.compress_to_slice(&input, &mut output3).unwrap();
413468
assert_eq!(encoded, encoded2);
469+
assert_eq!(encoded, encoded3);
414470

415471
let mut decoded = vec![0; 10];
416472
let mut decoded2 = vec![0; 10];
473+
let mut decoded3 = vec![0; 10];
417474
let decoded = codec.decode64(encoded, &mut decoded).unwrap();
418475
let decoded2 = codec.decode64(encoded, &mut decoded2).unwrap();
476+
let decoded3 = codec.decompress_to_slice(encoded, &mut decoded3).unwrap();
419477
assert_eq!(decoded, decoded2);
478+
assert_eq!(decoded, decoded3);
420479

421480
assert_eq!(decoded, input);
422481
}

src/lib.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,39 @@ pub mod cpp;
1616
#[cfg(feature = "rust")]
1717
/// Rust re-implementation of `FastPFor` (work in progress)
1818
pub mod rust;
19+
20+
/// Low-level compression interface using caller-provided buffers.
21+
///
22+
/// Codecs write into pre-allocated slices and return a sub-slice showing exactly
23+
/// what was written. Works across FFI boundaries and allows buffer reuse.
24+
///
25+
/// # Type Parameters
26+
///
27+
/// - `In`: Input data type (e.g., `u32` or `u64` for integer codecs)
28+
/// - `Out`: Compressed output type (defaults to `In`, but may differ - e.g.,
29+
/// 64-bit integers compress to 32-bit words: `CodecToSlice<u64, u32>`)
30+
///
31+
/// # Buffer Sizing
32+
///
33+
/// Caller must ensure output buffers are large enough. For compression, estimate
34+
/// `input.len() * 2 + 1024`. For decompression, size depends on the codec.
35+
pub trait CodecToSlice<In, Out = In> {
36+
/// Error type returned by compression/decompression operations.
37+
type Error;
38+
39+
/// Compresses input into output buffer, returning slice of data written.
40+
fn compress_to_slice<'out>(
41+
&mut self,
42+
input: &[In],
43+
output: &'out mut [Out],
44+
) -> Result<&'out [Out], Self::Error>;
45+
46+
/// Decompresses input into output buffer, returning slice of data written.
47+
///
48+
/// Output size cannot be known in advance for some codecs (e.g., RLE).
49+
fn decompress_to_slice<'out>(
50+
&mut self,
51+
input: &[Out],
52+
output: &'out mut [In],
53+
) -> Result<&'out [In], Self::Error>;
54+
}

src/rust/error.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,8 @@ pub enum FastPForError {
1818
/// Output buffer too small
1919
#[error("Output buffer too small")]
2020
OutputBufferTooSmall,
21+
22+
/// Invalid input length
23+
#[error("Invalid input length {0}")]
24+
InvalidInputLength(usize),
2125
}

src/rust/integer_compression/codec.rs

Lines changed: 123 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use crate::rust::{FastPFOR, JustCopy, VariableByte};
1+
use std::io::Cursor;
2+
3+
use crate::rust::{FastPFOR, FastPForResult, Integer, JustCopy, VariableByte};
4+
use crate::CodecToSlice;
25

36
/// Type-erased wrapper for compression codecs.
47
///
@@ -12,6 +15,100 @@ pub enum Codec {
1215
JustCopy(JustCopy),
1316
}
1417

18+
impl Integer<u32> for Codec {
19+
fn compress(
20+
&mut self,
21+
input: &[u32],
22+
input_length: u32,
23+
input_offset: &mut Cursor<u32>,
24+
output: &mut [u32],
25+
output_offset: &mut Cursor<u32>,
26+
) -> FastPForResult<()> {
27+
match self {
28+
Codec::FastPFor(fastpfor) => {
29+
fastpfor.compress(input, input_length, input_offset, output, output_offset)
30+
}
31+
Codec::VariableByte(vb) => {
32+
vb.compress(input, input_length, input_offset, output, output_offset)
33+
}
34+
Codec::JustCopy(jc) => {
35+
jc.compress(input, input_length, input_offset, output, output_offset)
36+
}
37+
}
38+
}
39+
40+
fn uncompress(
41+
&mut self,
42+
input: &[u32],
43+
input_length: u32,
44+
input_offset: &mut Cursor<u32>,
45+
output: &mut [u32],
46+
output_offset: &mut Cursor<u32>,
47+
) -> FastPForResult<()> {
48+
match self {
49+
Codec::FastPFor(fastpfor) => {
50+
fastpfor.uncompress(input, input_length, input_offset, output, output_offset)
51+
}
52+
Codec::VariableByte(vb) => {
53+
vb.uncompress(input, input_length, input_offset, output, output_offset)
54+
}
55+
Codec::JustCopy(jc) => {
56+
jc.uncompress(input, input_length, input_offset, output, output_offset)
57+
}
58+
}
59+
}
60+
}
61+
62+
impl CodecToSlice<u32> for Codec {
63+
type Error = crate::rust::FastPForError;
64+
65+
fn compress_to_slice<'out>(
66+
&mut self,
67+
input: &[u32],
68+
output: &'out mut [u32],
69+
) -> Result<&'out [u32], Self::Error> {
70+
let mut output_offset = Cursor::new(0);
71+
let input_length = input
72+
.len()
73+
.try_into()
74+
.map_err(|_| Self::Error::InvalidInputLength(input.len()))?;
75+
76+
self.compress(
77+
input,
78+
input_length,
79+
&mut Cursor::new(0),
80+
output,
81+
&mut output_offset,
82+
)?;
83+
84+
let written = output_offset.position() as usize;
85+
Ok(&output[..written])
86+
}
87+
88+
fn decompress_to_slice<'out>(
89+
&mut self,
90+
input: &[u32],
91+
output: &'out mut [u32],
92+
) -> Result<&'out [u32], Self::Error> {
93+
let mut output_offset = Cursor::new(0);
94+
let input_length: u32 = input
95+
.len()
96+
.try_into()
97+
.map_err(|_| Self::Error::InvalidInputLength(input.len()))?;
98+
99+
self.uncompress(
100+
input,
101+
input_length,
102+
&mut Cursor::new(0),
103+
output,
104+
&mut output_offset,
105+
)?;
106+
107+
let written = output_offset.position() as usize;
108+
Ok(&output[..written])
109+
}
110+
}
111+
15112
impl From<FastPFOR> for Codec {
16113
fn from(fastpfor: FastPFOR) -> Self {
17114
Codec::FastPFor(Box::new(fastpfor))
@@ -29,3 +126,28 @@ impl From<JustCopy> for Codec {
29126
Codec::JustCopy(jc)
30127
}
31128
}
129+
130+
#[cfg(test)]
131+
mod tests {
132+
use super::*;
133+
134+
#[test]
135+
fn supports_compress_to_slice() {
136+
let data = vec![1, 2, 3, 4, 5];
137+
let mut rust_codec = Codec::from(VariableByte::new());
138+
let mut compressed = vec![0u32; data.len() * 4];
139+
140+
let compressed_len = {
141+
let result = rust_codec
142+
.compress_to_slice(&data, &mut compressed)
143+
.unwrap();
144+
result.len()
145+
};
146+
147+
let mut decompressed = vec![0u32; data.len()];
148+
let result = rust_codec
149+
.decompress_to_slice(&compressed[..compressed_len], &mut decompressed)
150+
.unwrap();
151+
assert_eq!(result, &data[..]);
152+
}
153+
}
Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::io::Cursor;
22

3-
use crate::rust::{Codec, FastPForResult};
3+
use crate::rust::FastPForResult;
44

55
/// Integer compression/decompression interface with length headers.
66
///
@@ -37,47 +37,3 @@ pub trait Integer<T> {
3737
output_offset: &mut Cursor<u32>,
3838
) -> FastPForResult<()>;
3939
}
40-
41-
impl Integer<u32> for Codec {
42-
fn compress(
43-
&mut self,
44-
input: &[u32],
45-
input_length: u32,
46-
input_offset: &mut Cursor<u32>,
47-
output: &mut [u32],
48-
output_offset: &mut Cursor<u32>,
49-
) -> FastPForResult<()> {
50-
match self {
51-
Codec::FastPFor(fastpfor) => {
52-
fastpfor.compress(input, input_length, input_offset, output, output_offset)
53-
}
54-
Codec::VariableByte(vb) => {
55-
vb.compress(input, input_length, input_offset, output, output_offset)
56-
}
57-
Codec::JustCopy(jc) => {
58-
jc.compress(input, input_length, input_offset, output, output_offset)
59-
}
60-
}
61-
}
62-
63-
fn uncompress(
64-
&mut self,
65-
input: &[u32],
66-
input_length: u32,
67-
input_offset: &mut Cursor<u32>,
68-
output: &mut [u32],
69-
output_offset: &mut Cursor<u32>,
70-
) -> FastPForResult<()> {
71-
match self {
72-
Codec::FastPFor(fastpfor) => {
73-
fastpfor.uncompress(input, input_length, input_offset, output, output_offset)
74-
}
75-
Codec::VariableByte(vb) => {
76-
vb.uncompress(input, input_length, input_offset, output, output_offset)
77-
}
78-
Codec::JustCopy(jc) => {
79-
jc.uncompress(input, input_length, input_offset, output, output_offset)
80-
}
81-
}
82-
}
83-
}

0 commit comments

Comments
 (0)