feat!: add an unified CodecToSlice<In, Out = In> API (#58)

CommanderStorm · pre-commit-ci[bot] · web-flow · commit 2d5d4e7f1797 · 2026-02-14T16:58:05.000+01:00
This PR adds a new trait `CodecToSlice&lt;In, Out = In&gt;`:

```rust
/// Low-level compression interface using caller-provided buffers.
///
/// Codecs write into pre-allocated slices and return a sub-slice showing exactly
/// what was written. Works across FFI boundaries and allows buffer reuse.
///
/// # Type Parameters
///
/// - `In`: Input data type (e.g., `u32` or `u64` for integer codecs)
/// - `Out`: Compressed output type (defaults to `In`, but may differ - e.g.,
///   64-bit integers compress to 32-bit words: `CodecToSlice&lt;u64, u32&gt;`)
///
/// # Buffer Sizing
///
/// Caller must ensure output buffers are large enough. For compression, estimate
/// `input.len() * 2 + 1024`. For decompression, size depends on the codec.
pub trait CodecToSlice&lt;In, Out = In&gt; {
    /// Error type returned by compression/decompression operations.
    type Error;

    /// Compresses input into output buffer, returning slice of data written.
    fn compress_to_slice&lt;'out&gt;(
        &amp;mut self,
        input: &amp;[In],
        output: &amp;'out mut [Out],
    ) -&gt; Result&lt;&amp;'out [Out], Self::Error&gt;;

    /// Decompresses input into output buffer, returning slice of data written.
    ///
    /// Output size cannot be known in advance for some codecs (e.g., RLE).
    fn decompress_to_slice&lt;'out&gt;(
        &amp;mut self,
        input: &amp;[Out],
        output: &amp;'out mut [In],
    ) -&gt; Result&lt;&amp;'out [In], Self::Error&gt;;
}
```

Here are the alternatives I have considered:
- using `bytes` or `Cursor` based API does not work for cpp
- using `&amp;mut Vec` does not work for being zero abstraction and likely
not work for cpp (because of what users would expect)
- ```rust
  pub trait Codec&lt;In, Out=In&gt; {
    type Error;
fn compress(&amp;mut self, input: &amp;[In]) -&gt; Result&lt;Vec&lt;Out&gt;, Self::Error&gt;;
fn decompress(&amp;mut self, input: &amp;[In]) -&gt; Result&lt;Vec&lt;Out&gt;, Self::Error&gt;;
  }
  ```
  Pro: Works well for the rust world
  Con: may alocate
- ```rust
  pub trait Codec {
    type CompressBuilder;
fn compress_builder&lt;'out&gt;(&amp;mut self, input: &amp;[u32]) -&gt;
CompressBuilder&lt;'out&gt;;
  }

  pub struct CompressOp&lt;'a&gt; { /* ... */ }

  impl CompressOp&lt;'_&gt; {
fn to_slice(self, output: &amp;mut [u32]) -&gt; Result&lt;&amp;[u32], Error&gt; { ... }
    fn to_vec(self) -&gt; Result&lt;Vec&lt;u32&gt;, Error&gt; { ... }
    fn to_buf(self, buf: impl BufMut) -&gt; Result&lt;usize, Error&gt; { ... }
  }
  ```
  best of both worlds, some usage difficulty though.
  Not sure if this would be better


I chose `CodecToSlice` since this way we can migrate step by step and
still have the "good name" `Codec` for the final trait that we can
settle on, once we have a better idea what kind of API we really need.

---------

Co-authored-by: pre-commit-ci[bot] &lt;66853113+pre-commit-ci[bot]@users.noreply.github.com&gt;
diff --git a/src/cpp/mod.rs b/src/cpp/mod.rs
@@ -4,6 +4,8 @@
 pub use cxx::Exception;
 use cxx::UniquePtr;
 
+use crate::CodecToSlice;
+
 /// FFI bridge to the C++ FastPFOR library.
 ///
 /// This module contains the raw FFI declarations for interfacing with the C++ code.
@@ -142,6 +144,51 @@ pub trait Codec32: CodecWrapper {
     }
 }
 
+impl<C: Codec32> CodecToSlice<u32> for C {
+    type Error = Exception;
+
+    fn compress_to_slice<'out>(
+        &mut self,
+        input: &[u32],
+        output: &'out mut [u32],
+    ) -> Result<&'out [u32], Self::Error> {
+        let result = self.encode32(input, output)?;
+        Ok(result)
+    }
+
+    fn decompress_to_slice<'out>(
+        &mut self,
+        input: &[u32],
+        output: &'out mut [u32],
+    ) -> Result<&'out [u32], Self::Error> {
+        let result = self.decode32(input, output)?;
+        Ok(result)
+    }
+}
+
+// Note: 64-bit integers are compressed into 32-bit word arrays.
+impl<C: Codec64> CodecToSlice<u64, u32> for C {
+    type Error = Exception;
+
+    fn compress_to_slice<'out>(
+        &mut self,
+        input: &[u64],
+        output: &'out mut [u32],
+    ) -> Result<&'out [u32], Self::Error> {
+        let result = self.encode64(input, output)?;
+        Ok(result)
+    }
+
+    fn decompress_to_slice<'out>(
+        &mut self,
+        input: &[u32],
+        output: &'out mut [u64],
+    ) -> Result<&'out [u64], Self::Error> {
+        let result = self.decode64(input, output)?;
+        Ok(result)
+    }
+}
+
 /// Trait for codecs that support 64-bit integer compression.
 ///
 /// Only certain codecs support 64-bit integers. These are marked with the `@ 64`
@@ -385,38 +432,50 @@ mod tests {
 
     #[test]
     fn test_32() {
-        let codec = FastPFor128Codec::new();
+        let mut codec = FastPFor128Codec::new();
         let input = vec![1, 2, 3, 4, 5];
         let mut output = vec![0; 10];
         let mut output2 = vec![0; 10];
+        let mut output3 = vec![0; 10];
         let encoded = codec.encode32(&input, &mut output).unwrap();
         let encoded2 = codec.encode32(&input, &mut output2).unwrap();
+        let encoded3 = codec.compress_to_slice(&input, &mut output3).unwrap();
         assert_eq!(encoded, encoded2);
+        assert_eq!(encoded, encoded3);
 
         let mut decoded = vec![0; 10];
         let mut decoded2 = vec![0; 10];
+        let mut decoded3 = vec![0; 10];
         let decoded = codec.decode32(encoded, &mut decoded).unwrap();
         let decoded2 = codec.decode32(encoded, &mut decoded2).unwrap();
+        let decoded3 = codec.decompress_to_slice(encoded, &mut decoded3).unwrap();
         assert_eq!(decoded, decoded2);
+        assert_eq!(decoded, decoded3);
 
         assert_eq!(decoded, input);
     }
 
     #[test]
     fn test_64() {
-        let codec = FastPFor128Codec::new();
+        let mut codec = FastPFor128Codec::new();
         let input = vec![1, 2, 3, 4, 5];
         let mut output = vec![0; 10];
         let mut output2 = vec![0; 10];
+        let mut output3 = vec![0; 10];
         let encoded = codec.encode64(&input, &mut output).unwrap();
         let encoded2 = codec.encode64(&input, &mut output2).unwrap();
+        let encoded3 = codec.compress_to_slice(&input, &mut output3).unwrap();
         assert_eq!(encoded, encoded2);
+        assert_eq!(encoded, encoded3);
 
         let mut decoded = vec![0; 10];
         let mut decoded2 = vec![0; 10];
+        let mut decoded3 = vec![0; 10];
         let decoded = codec.decode64(encoded, &mut decoded).unwrap();
         let decoded2 = codec.decode64(encoded, &mut decoded2).unwrap();
+        let decoded3 = codec.decompress_to_slice(encoded, &mut decoded3).unwrap();
         assert_eq!(decoded, decoded2);
+        assert_eq!(decoded, decoded3);
 
         assert_eq!(decoded, input);
     }
diff --git a/src/lib.rs b/src/lib.rs
@@ -16,3 +16,39 @@ pub mod cpp;
 #[cfg(feature = "rust")]
 /// Rust re-implementation of `FastPFor` (work in progress)
 pub mod rust;
+
+/// Low-level compression interface using caller-provided buffers.
+///
+/// Codecs write into pre-allocated slices and return a sub-slice showing exactly
+/// what was written. Works across FFI boundaries and allows buffer reuse.
+///
+/// # Type Parameters
+///
+/// - `In`: Input data type (e.g., `u32` or `u64` for integer codecs)
+/// - `Out`: Compressed output type (defaults to `In`, but may differ - e.g.,
+///   64-bit integers compress to 32-bit words: `CodecToSlice<u64, u32>`)
+///
+/// # Buffer Sizing
+///
+/// Caller must ensure output buffers are large enough. For compression, estimate
+/// `input.len() * 2 + 1024`. For decompression, size depends on the codec.
+pub trait CodecToSlice<In, Out = In> {
+    /// Error type returned by compression/decompression operations.
+    type Error;
+
+    /// Compresses input into output buffer, returning slice of data written.
+    fn compress_to_slice<'out>(
+        &mut self,
+        input: &[In],
+        output: &'out mut [Out],
+    ) -> Result<&'out [Out], Self::Error>;
+
+    /// Decompresses input into output buffer, returning slice of data written.
+    ///
+    /// Output size cannot be known in advance for some codecs (e.g., RLE).
+    fn decompress_to_slice<'out>(
+        &mut self,
+        input: &[Out],
+        output: &'out mut [In],
+    ) -> Result<&'out [In], Self::Error>;
+}
diff --git a/src/rust/error.rs b/src/rust/error.rs
@@ -18,4 +18,8 @@ pub enum FastPForError {
     /// Output buffer too small
     #[error("Output buffer too small")]
     OutputBufferTooSmall,
+
+    /// Invalid input length
+    #[error("Invalid input length {0}")]
+    InvalidInputLength(usize),
 }
diff --git a/src/rust/integer_compression/codec.rs b/src/rust/integer_compression/codec.rs
@@ -1,4 +1,7 @@
-use crate::rust::{FastPFOR, JustCopy, VariableByte};
+use std::io::Cursor;
+
+use crate::rust::{FastPFOR, FastPForResult, Integer, JustCopy, VariableByte};
+use crate::CodecToSlice;
 
 /// Type-erased wrapper for compression codecs.
 ///
@@ -12,6 +15,100 @@ pub enum Codec {
     JustCopy(JustCopy),
 }
 
+impl Integer<u32> for Codec {
+    fn compress(
+        &mut self,
+        input: &[u32],
+        input_length: u32,
+        input_offset: &mut Cursor<u32>,
+        output: &mut [u32],
+        output_offset: &mut Cursor<u32>,
+    ) -> FastPForResult<()> {
+        match self {
+            Codec::FastPFor(fastpfor) => {
+                fastpfor.compress(input, input_length, input_offset, output, output_offset)
+            }
+            Codec::VariableByte(vb) => {
+                vb.compress(input, input_length, input_offset, output, output_offset)
+            }
+            Codec::JustCopy(jc) => {
+                jc.compress(input, input_length, input_offset, output, output_offset)
+            }
+        }
+    }
+
+    fn uncompress(
+        &mut self,
+        input: &[u32],
+        input_length: u32,
+        input_offset: &mut Cursor<u32>,
+        output: &mut [u32],
+        output_offset: &mut Cursor<u32>,
+    ) -> FastPForResult<()> {
+        match self {
+            Codec::FastPFor(fastpfor) => {
+                fastpfor.uncompress(input, input_length, input_offset, output, output_offset)
+            }
+            Codec::VariableByte(vb) => {
+                vb.uncompress(input, input_length, input_offset, output, output_offset)
+            }
+            Codec::JustCopy(jc) => {
+                jc.uncompress(input, input_length, input_offset, output, output_offset)
+            }
+        }
+    }
+}
+
+impl CodecToSlice<u32> for Codec {
+    type Error = crate::rust::FastPForError;
+
+    fn compress_to_slice<'out>(
+        &mut self,
+        input: &[u32],
+        output: &'out mut [u32],
+    ) -> Result<&'out [u32], Self::Error> {
+        let mut output_offset = Cursor::new(0);
+        let input_length = input
+            .len()
+            .try_into()
+            .map_err(|_| Self::Error::InvalidInputLength(input.len()))?;
+
+        self.compress(
+            input,
+            input_length,
+            &mut Cursor::new(0),
+            output,
+            &mut output_offset,
+        )?;
+
+        let written = output_offset.position() as usize;
+        Ok(&output[..written])
+    }
+
+    fn decompress_to_slice<'out>(
+        &mut self,
+        input: &[u32],
+        output: &'out mut [u32],
+    ) -> Result<&'out [u32], Self::Error> {
+        let mut output_offset = Cursor::new(0);
+        let input_length: u32 = input
+            .len()
+            .try_into()
+            .map_err(|_| Self::Error::InvalidInputLength(input.len()))?;
+
+        self.uncompress(
+            input,
+            input_length,
+            &mut Cursor::new(0),
+            output,
+            &mut output_offset,
+        )?;
+
+        let written = output_offset.position() as usize;
+        Ok(&output[..written])
+    }
+}
+
 impl From<FastPFOR> for Codec {
     fn from(fastpfor: FastPFOR) -> Self {
         Codec::FastPFor(Box::new(fastpfor))
@@ -29,3 +126,28 @@ impl From<JustCopy> for Codec {
         Codec::JustCopy(jc)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn supports_compress_to_slice() {
+        let data = vec![1, 2, 3, 4, 5];
+        let mut rust_codec = Codec::from(VariableByte::new());
+        let mut compressed = vec![0u32; data.len() * 4];
+
+        let compressed_len = {
+            let result = rust_codec
+                .compress_to_slice(&data, &mut compressed)
+                .unwrap();
+            result.len()
+        };
+
+        let mut decompressed = vec![0u32; data.len()];
+        let result = rust_codec
+            .decompress_to_slice(&compressed[..compressed_len], &mut decompressed)
+            .unwrap();
+        assert_eq!(result, &data[..]);
+    }
+}
diff --git a/src/rust/integer_compression/integer_codec.rs b/src/rust/integer_compression/integer_codec.rs
@@ -1,6 +1,6 @@
 use std::io::Cursor;
 
-use crate::rust::{Codec, FastPForResult};
+use crate::rust::FastPForResult;
 
 /// Integer compression/decompression interface with length headers.
 ///
@@ -37,47 +37,3 @@ pub trait Integer<T> {
         output_offset: &mut Cursor<u32>,
     ) -> FastPForResult<()>;
 }
-
-impl Integer<u32> for Codec {
-    fn compress(
-        &mut self,
-        input: &[u32],
-        input_length: u32,
-        input_offset: &mut Cursor<u32>,
-        output: &mut [u32],
-        output_offset: &mut Cursor<u32>,
-    ) -> FastPForResult<()> {
-        match self {
-            Codec::FastPFor(fastpfor) => {
-                fastpfor.compress(input, input_length, input_offset, output, output_offset)
-            }
-            Codec::VariableByte(vb) => {
-                vb.compress(input, input_length, input_offset, output, output_offset)
-            }
-            Codec::JustCopy(jc) => {
-                jc.compress(input, input_length, input_offset, output, output_offset)
-            }
-        }
-    }
-
-    fn uncompress(
-        &mut self,
-        input: &[u32],
-        input_length: u32,
-        input_offset: &mut Cursor<u32>,
-        output: &mut [u32],
-        output_offset: &mut Cursor<u32>,
-    ) -> FastPForResult<()> {
-        match self {
-            Codec::FastPFor(fastpfor) => {
-                fastpfor.uncompress(input, input_length, input_offset, output, output_offset)
-            }
-            Codec::VariableByte(vb) => {
-                vb.uncompress(input, input_length, input_offset, output, output_offset)
-            }
-            Codec::JustCopy(jc) => {
-                jc.uncompress(input, input_length, input_offset, output, output_offset)
-            }
-        }
-    }
-}

Original file line number	Diff line number	Diff line change
`@@ -18,4 +18,8 @@ pub enum FastPForError {`
`18`	`18`	`/// Output buffer too small`
`19`	`19`	`#[error("Output buffer too small")]`
`20`	`20`	`OutputBufferTooSmall,`
	`21`	`+`
	`22`	`+ /// Invalid input length`
	`23`	`+ #[error("Invalid input length {0}")]`
	`24`	`+ InvalidInputLength(usize),`
`21`	`25`	`}`