Skip to content

Commit 230cfa3

Browse files
committed
update
1 parent 6eee9f6 commit 230cfa3

6 files changed

Lines changed: 205 additions & 23 deletions

File tree

.travis.yml

Lines changed: 0 additions & 7 deletions
This file was deleted.

CMakeLists.txt

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
cmake_minimum_required(VERSION 3.15)
2+
3+
project(MaskedVByte VERSION 0.0.1 LANGUAGES C)
4+
5+
include(GNUInstallDirs)
6+
include(CMakePackageConfigHelpers)
7+
include(CheckCCompilerFlag)
8+
9+
option(MASKEDVBYTE_BUILD_EXAMPLES "Build examples" ON)
10+
option(MASKEDVBYTE_BUILD_TESTS "Build tests" ON)
11+
12+
# Library
13+
add_library(maskedvbyte
14+
src/varintdecode.c
15+
src/varintencode.c
16+
)
17+
add_library(maskedvbyte::maskedvbyte ALIAS maskedvbyte)
18+
19+
# Public headers
20+
set(MASKEDVBYTE_PUBLIC_HEADERS
21+
include/varintdecode.h
22+
include/varintencode.h
23+
)
24+
25+
# Properties
26+
set_target_properties(maskedvbyte PROPERTIES
27+
OUTPUT_NAME maskedvbyte
28+
VERSION ${PROJECT_VERSION}
29+
SOVERSION ${PROJECT_VERSION_MAJOR}
30+
POSITION_INDEPENDENT_CODE ON
31+
)
32+
33+
target_compile_features(maskedvbyte PUBLIC c_std_99)
34+
35+
target_include_directories(maskedvbyte
36+
PUBLIC
37+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
38+
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
39+
)
40+
41+
# Enable SSE4.1 on GCC/Clang when available
42+
if (CMAKE_C_COMPILER_ID MATCHES "Clang|AppleClang|GNU")
43+
check_c_compiler_flag("-msse4.1" HAS_SSE41_FLAG)
44+
if (HAS_SSE41_FLAG)
45+
target_compile_options(maskedvbyte PRIVATE -msse4.1)
46+
endif()
47+
endif()
48+
49+
# Examples
50+
if (MASKEDVBYTE_BUILD_EXAMPLES)
51+
add_executable(example examples/example.c)
52+
target_link_libraries(example PRIVATE maskedvbyte)
53+
target_compile_features(example PRIVATE c_std_99)
54+
target_include_directories(example PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
55+
endif()
56+
57+
# Tests
58+
if (MASKEDVBYTE_BUILD_TESTS)
59+
include(CTest)
60+
add_executable(unit tests/unit.c)
61+
target_link_libraries(unit PRIVATE maskedvbyte)
62+
target_compile_features(unit PRIVATE c_std_99)
63+
target_include_directories(unit PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include)
64+
add_test(NAME maskedvbyte_unit COMMAND unit)
65+
endif()
66+
67+
# Install rules
68+
install(TARGETS maskedvbyte
69+
EXPORT maskedvbyteTargets
70+
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
71+
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
72+
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
73+
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
74+
)
75+
76+
install(FILES ${MASKEDVBYTE_PUBLIC_HEADERS}
77+
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
78+
)
79+
80+
# Package config and export
81+
install(EXPORT maskedvbyteTargets
82+
NAMESPACE maskedvbyte::
83+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/maskedvbyte
84+
)
85+
86+
configure_package_config_file(
87+
cmake/maskedvbyteConfig.cmake.in
88+
${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteConfig.cmake
89+
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/maskedvbyte
90+
)
91+
92+
write_basic_package_version_file(
93+
${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteConfigVersion.cmake
94+
VERSION ${PROJECT_VERSION}
95+
COMPATIBILITY SameMajorVersion
96+
)
97+
98+
install(FILES
99+
${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteConfig.cmake
100+
${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteConfigVersion.cmake
101+
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/maskedvbyte
102+
)
103+
104+
# Export from build tree for convenience
105+
export(EXPORT maskedvbyteTargets
106+
NAMESPACE maskedvbyte::
107+
FILE ${CMAKE_CURRENT_BINARY_DIR}/maskedvbyteTargets.cmake
108+
)

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,8 @@ $(LIBNAME): $(OBJECTS)
4343

4444

4545

46-
example: ./example.c $(HEADERS) $(OBJECTS)
47-
$(CC) $(CFLAGS) -o example ./example.c -Iinclude $(OBJECTS)
46+
example: ./examples/example.c $(HEADERS) $(OBJECTS)
47+
$(CC) $(CFLAGS) -o example ./examples/example.c -Iinclude $(OBJECTS)
4848

4949
unit: ./tests/unit.c $(HEADERS) $(OBJECTS)
5050
$(CC) $(CFLAGS) -o unit ./tests/unit.c -Iinclude $(OBJECTS)

README.md

Lines changed: 87 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,98 @@ MaskedVByte
22
===========
33
[![Ubuntu](https://github.com/fast-pack/MaskedVByte/actions/workflows/ubuntu.yml/badge.svg)](https://github.com/fast-pack/MaskedVByte/actions/workflows/ubuntu.yml)
44

5-
Fast decoder for VByte-compressed integers in C.
5+
Fast, vectorized VByte decoding for 32‑bit integers in C, with optional differential (delta) coding.
66

7-
It includes fast differential coding.
7+
- Requires x86-64 with SSE4.1 (available on virtually all modern x64 CPUs)
8+
- C99 compatible
9+
- Includes a tiny test and a runnable example
810

9-
We require x64 processors support SSE 4.1 or better. This includes virtually all x64 processors in service today, except for very old or specialized processors.
11+
Platform notes
12+
--------------
13+
- The library and tests build on Linux and macOS with a standard C toolchain.
14+
- The Makefile installs a shared object named `libmaskedvbyte.so` and uses `ldconfig`, which are Linux specific. On macOS, build and run targets work, but the `install` target is not applicable.
1015

11-
The code should build using most standard-compliant modern C compilers (C99). The provided makefile
12-
expects a Linux-like system.
1316

17+
Build and test
18+
--------------
1419

15-
Usage:
20+
```sh
21+
make # builds the library and the test binary
22+
./unit # runs a quick correctness test
23+
```
1624

17-
make
18-
./unit
25+
Build and run the example
26+
-------------------------
1927

20-
See example.c for an example.
28+
```sh
29+
make example
30+
./example
31+
```
2132

22-
Short code sample:
33+
You should see something like:
2334

24-
```C
25-
size_t compsize = vbyte_encode(datain, N, compressedbuffer); // encoding
26-
// here the result is stored in compressedbuffer using compsize bytes
27-
size_t compsize2 = masked_vbyte_decode(compressedbuffer, recovdata, N); // decoding (fast)
2835
```
36+
Compressed 5000 integers down to 5000 bytes.
37+
```
38+
39+
Embedded example, explained
40+
---------------------------
41+
The example allocates input/output buffers, encodes a flat array of integers with classic VByte, then decodes it back with the masked (vectorized) decoder and verifies the sizes match.
42+
43+
```c
44+
#include <stdio.h>
45+
#include <stdlib.h>
46+
#include <assert.h>
47+
48+
#include "varintencode.h"
49+
#include "varintdecode.h"
50+
51+
int main() {
52+
int N = 5000;
53+
uint32_t * datain = malloc(N * sizeof(uint32_t));
54+
uint8_t * compressedbuffer = malloc(N * sizeof(uint32_t));
55+
uint32_t * recovdata = malloc(N * sizeof(uint32_t));
56+
for (int k = 0; k < N; ++k)
57+
datain[k] = 120; // constant value fits in one VByte
58+
size_t compsize = vbyte_encode(datain, N, compressedbuffer); // encoding
59+
// result is stored in 'compressedbuffer' using 'compsize' bytes
60+
size_t compsize2 = masked_vbyte_decode(compressedbuffer, recovdata, N); // fast decoding
61+
assert(compsize == compsize2); // sanity check
62+
free(datain);
63+
free(compressedbuffer);
64+
free(recovdata);
65+
printf("Compressed %d integers down to %d bytes.\n", N, (int)compsize);
66+
return 0;
67+
}
68+
```
69+
70+
What’s happening:
71+
- VByte uses a continuation bit; small values like 120 encode to a single byte, so 5000 values compress to 5000 bytes.
72+
- `masked_vbyte_decode` is a vectorized decoder using SSE4.1 for speed.
73+
- Differential coding variants are available when your data is sorted or has small gaps.
74+
75+
API at a glance
76+
---------------
77+
Headers are in `include/`.
78+
79+
- Encoding
80+
- `size_t vbyte_encode(const uint32_t* in, size_t length, uint8_t* bout);`
81+
- `size_t vbyte_encode_delta(const uint32_t* in, size_t length, uint8_t* bout, uint32_t prev);`
82+
83+
- Decoding
84+
- `size_t masked_vbyte_decode(const uint8_t* in, uint32_t* out, uint64_t length);`
85+
- `size_t masked_vbyte_decode_delta(const uint8_t* in, uint32_t* out, uint64_t length, uint32_t prev);`
86+
- `size_t masked_vbyte_decode_fromcompressedsize(const uint8_t* in, uint32_t* out, size_t inputsize);`
87+
- `size_t masked_vbyte_decode_fromcompressedsize_delta(const uint8_t* in, uint32_t* out, size_t inputsize, uint32_t prev);`
88+
- Random access helpers for delta streams:
89+
- `uint32_t masked_vbyte_select_delta(const uint8_t *in, uint64_t length, uint32_t prev, size_t slot);`
90+
- `int masked_vbyte_search_delta(const uint8_t *in, uint64_t length, uint32_t prev, uint32_t key, uint32_t *presult);`
91+
92+
Tips
93+
----
94+
- Prefer delta coding when your sequence is sorted or has small differences; it often reduces the number of bytes per integer.
95+
- If you know the compressed byte length, use the `*_fromcompressedsize` functions to decode exactly that many bytes.
96+
2997

3098
Interesting applications
3199
-----------------------
@@ -60,3 +128,8 @@ See also
60128
* Oroch is a C++ library that offers a usable API (MIT license) https://github.com/ademakov/Oroch
61129

62130

131+
License
132+
-------
133+
See `LICENSE` for details.
134+
135+

cmake/maskedvbyteConfig.cmake.in

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
@PACKAGE_INIT@
2+
3+
include(CMakeFindDependencyMacro)
4+
# Add find_dependency(...) here if the library gains external deps
5+
6+
include("${CMAKE_CURRENT_LIST_DIR}/maskedvbyteTargets.cmake")
7+
8+
check_required_components(maskedvbyte)
File renamed without changes.

0 commit comments

Comments
 (0)