Skip to content

Commit 3bdb376

Browse files
authored
add vector similarity benchmark (#7391)
## Summary Tracking issue: #7297 Adds basic benchmarking setup for vector similarity. Right now it is just a bunch of random vectors. Note that the numbers dont really mean anything right now as we have yet to optimize anything (namely I have not yet added the inner product / cosine similarity optimizations pushed through both the SORF transform and the dictionary for constant array). In the future we will add proper benchmarking on real datasets (likely in `vortex-bench`, and also maybe we will integrate https://github.com/zilliztech/vectordbbench ## Testing N/A Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 8d9052e commit 3bdb376

4 files changed

Lines changed: 375 additions & 0 deletions

File tree

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-tensor/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@ num-traits = { workspace = true }
3131
prost = { workspace = true }
3232

3333
[dev-dependencies]
34+
divan = { workspace = true }
35+
mimalloc = { workspace = true }
3436
rand = { workspace = true }
3537
rand_distr = { workspace = true }
3638
rstest = { workspace = true }
39+
vortex-btrblocks = { path = "../vortex-btrblocks" }
40+
41+
[[bench]]
42+
name = "similarity_search"
43+
harness = false
44+
test = false
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! End-to-end similarity-search execution benchmark.
5+
//!
6+
//! For each of three compression strategies (uncompressed, default BtrBlocks, TurboQuant), this
7+
//! bench:
8+
//!
9+
//! 1. Generates a deterministic random `Vector<dim, f32>` batch.
10+
//! 2. Applies the compression strategy *outside* the timed region.
11+
//! 3. Builds the lazy
12+
//! `Binary(Gt, [CosineSimilarity(data, query), threshold])`
13+
//! tree *outside* the timed region.
14+
//! 4. Times *only* `tree.execute::<BoolArray>(&mut ctx)`.
15+
//!
16+
//! Run with: `cargo bench -p vortex-tensor --bench similarity_search`
17+
18+
use divan::Bencher;
19+
use mimalloc::MiMalloc;
20+
use vortex_array::VortexSessionExecute;
21+
use vortex_array::arrays::BoolArray;
22+
use vortex_error::VortexExpect;
23+
24+
#[path = "similarity_search_common/mod.rs"]
25+
mod common;
26+
27+
use common::Variant;
28+
use common::build_similarity_search_tree;
29+
use common::extract_row_as_query;
30+
use common::generate_random_vectors;
31+
32+
#[global_allocator]
33+
static GLOBAL: MiMalloc = MiMalloc;
34+
35+
/// Number of vectors in the benchmark dataset.
36+
const NUM_ROWS: usize = 10_000;
37+
38+
/// Dimensionality of each vector. Must be `>= vortex_tensor::encodings::turboquant::MIN_DIMENSION`
39+
/// (128) for the TurboQuant variant to work.
40+
const DIM: u32 = 768;
41+
42+
/// Deterministic PRNG seed for the generated dataset.
43+
const SEED: u64 = 0xC0FFEE;
44+
45+
/// Cosine similarity threshold for the "greater than" filter. Random f32 vectors from N(0, 1) at
46+
/// this dimension have near-zero pairwise similarity, so picking a row of the dataset as the
47+
/// query guarantees at least that row matches.
48+
const THRESHOLD: f32 = 0.8;
49+
50+
fn main() {
51+
divan::main();
52+
}
53+
54+
/// Runs one end-to-end execution of the similarity-search tree for the given variant. All dataset
55+
/// generation and tree construction happens in the bench setup closure so only the execution of
56+
/// the lazy tree is timed.
57+
fn bench_variant(bencher: Bencher<'_, '_>, variant: Variant) {
58+
bencher
59+
.with_inputs(|| {
60+
let mut ctx = common::SESSION.create_execution_ctx();
61+
62+
// Use row 0 of the uncompressed data as the query so we always have at least one
63+
// match. Keeping the query extraction separate from the compressed-data build keeps
64+
// the query identical across all three variants.
65+
let raw = generate_random_vectors(NUM_ROWS, DIM, SEED);
66+
let query = extract_row_as_query(&raw, 0, DIM);
67+
let data = match variant {
68+
Variant::Uncompressed => raw,
69+
Variant::DefaultCompression => {
70+
common::compress_default(raw).vortex_expect("default compression succeeds")
71+
}
72+
Variant::TurboQuant => common::compress_turboquant(raw, &mut ctx)
73+
.vortex_expect("turboquant compression succeeds"),
74+
};
75+
76+
// println!(
77+
// "\n\n{}: {}\n\n",
78+
// variant,
79+
// data.display_tree_encodings_only()
80+
// );
81+
82+
let tree = build_similarity_search_tree(data, &query, THRESHOLD)
83+
.vortex_expect("tree construction succeeds");
84+
85+
(tree, ctx)
86+
})
87+
.bench_values(|(tree, mut ctx)| {
88+
// Hot path: only the .execute() call is timed. The result is a BoolArray of length
89+
// NUM_ROWS with true at positions where cosine_similarity > THRESHOLD.
90+
tree.execute::<BoolArray>(&mut ctx)
91+
.vortex_expect("similarity search tree executes to a BoolArray")
92+
});
93+
}
94+
95+
#[divan::bench]
96+
fn execute_uncompressed(bencher: Bencher) {
97+
bench_variant(bencher, Variant::Uncompressed);
98+
}
99+
100+
#[divan::bench]
101+
fn execute_default_compression(bencher: Bencher) {
102+
bench_variant(bencher, Variant::DefaultCompression);
103+
}
104+
105+
#[divan::bench]
106+
fn execute_turboquant(bencher: Bencher) {
107+
bench_variant(bencher, Variant::TurboQuant);
108+
}
Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Shared helpers for the similarity-search benchmark and example.
5+
//!
6+
//! This module is included from both `vortex-tensor/benches/similarity_search.rs` and
7+
//! `vortex-tensor/examples/similarity_search.rs` via an explicit `#[path = ...]` so both targets
8+
//! use the exact same array-tree builder.
9+
//!
10+
//! The three main entry points are:
11+
//!
12+
//! - [`generate_random_vectors`] to build a deterministic random [`Vector`] extension array.
13+
//! - [`build_variant`] to take a raw vector array and apply the requested compression strategy
14+
//! (uncompressed, default BtrBlocks, or TurboQuant).
15+
//! - [`build_similarity_search_tree`] to wire a cosine-similarity + threshold expression on top of
16+
//! a prepared data array and a single-row query vector.
17+
//!
18+
//! [`Vector`]: vortex_tensor::vector::Vector
19+
20+
#![allow(dead_code)]
21+
22+
use std::fmt;
23+
use std::sync::LazyLock;
24+
25+
use rand::SeedableRng;
26+
use rand::rngs::StdRng;
27+
use rand_distr::Distribution;
28+
use rand_distr::Normal;
29+
use vortex_array::ArrayRef;
30+
use vortex_array::ExecutionCtx;
31+
use vortex_array::IntoArray;
32+
use vortex_array::VortexSessionExecute;
33+
use vortex_array::arrays::ConstantArray;
34+
use vortex_array::arrays::Extension;
35+
use vortex_array::arrays::ExtensionArray;
36+
use vortex_array::arrays::FixedSizeListArray;
37+
use vortex_array::arrays::PrimitiveArray;
38+
use vortex_array::arrays::extension::ExtensionArrayExt;
39+
use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt;
40+
use vortex_array::arrays::scalar_fn::ScalarFnArrayExt;
41+
use vortex_array::builtins::ArrayBuiltins;
42+
use vortex_array::dtype::DType;
43+
use vortex_array::dtype::Nullability;
44+
use vortex_array::dtype::PType;
45+
use vortex_array::dtype::extension::ExtDType;
46+
use vortex_array::extension::EmptyMetadata;
47+
use vortex_array::scalar::Scalar;
48+
use vortex_array::scalar_fn::fns::operators::Operator;
49+
use vortex_array::session::ArraySession;
50+
use vortex_array::validity::Validity;
51+
use vortex_btrblocks::BtrBlocksCompressor;
52+
use vortex_buffer::BufferMut;
53+
use vortex_error::VortexExpect;
54+
use vortex_error::VortexResult;
55+
use vortex_error::vortex_panic;
56+
use vortex_session::VortexSession;
57+
use vortex_tensor::encodings::turboquant::TurboQuantConfig;
58+
use vortex_tensor::encodings::turboquant::turboquant_encode_unchecked;
59+
use vortex_tensor::scalar_fns::cosine_similarity::CosineSimilarity;
60+
use vortex_tensor::scalar_fns::l2_denorm::L2Denorm;
61+
use vortex_tensor::scalar_fns::l2_denorm::normalize_as_l2_denorm;
62+
use vortex_tensor::vector::Vector;
63+
64+
/// A shared [`VortexSession`] pre-loaded with the builtin [`ArraySession`] so both bench and
65+
/// example can create execution contexts cheaply.
66+
pub static SESSION: LazyLock<VortexSession> =
67+
LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
68+
69+
/// The three compression strategies the benchmark and example exercise.
70+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71+
pub enum Variant {
72+
/// Raw `Vector<dim, f32>` with no compression applied.
73+
Uncompressed,
74+
/// `BtrBlocksCompressor::default()` walks into the extension array and compresses the
75+
/// underlying FSL storage child with the default scheme set (no TurboQuant).
76+
DefaultCompression,
77+
/// TurboQuant: normalize, quantize to `FSL(Dict)`, wrap in SORF + `L2Denorm`.
78+
TurboQuant,
79+
}
80+
81+
impl fmt::Display for Variant {
82+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83+
match self {
84+
Self::Uncompressed => f.write_str("Uncompressed"),
85+
Self::DefaultCompression => f.write_str("DefaultCompression"),
86+
Self::TurboQuant => f.write_str("TurboQuant"),
87+
}
88+
}
89+
}
90+
91+
/// Generate `num_rows` random f32 vectors of dimension `dim`, wrapped in a [`Vector`] extension
92+
/// array. The values are drawn from a standard normal distribution seeded by `seed` so results
93+
/// are reproducible across runs.
94+
///
95+
/// [`Vector`]: vortex_tensor::vector::Vector
96+
pub fn generate_random_vectors(num_rows: usize, dim: u32, seed: u64) -> ArrayRef {
97+
let mut rng = StdRng::seed_from_u64(seed);
98+
// `Normal::new(0, 1)` is infallible for these parameters. `rand_distr::NormalError` does
99+
// not implement `Into<VortexError>`, so we cannot use `vortex_expect` here; fall back to
100+
// `vortex_panic!` on the (impossible) error path instead.
101+
let normal =
102+
Normal::new(0.0f32, 1.0).unwrap_or_else(|_| vortex_panic!("Normal(0, 1) is well-defined"));
103+
104+
let dim_usize = dim as usize;
105+
let mut buf = BufferMut::<f32>::with_capacity(num_rows * dim_usize);
106+
for _ in 0..(num_rows * dim_usize) {
107+
buf.push(normal.sample(&mut rng));
108+
}
109+
110+
let elements = PrimitiveArray::new::<f32>(buf.freeze(), Validity::NonNullable);
111+
let fsl =
112+
FixedSizeListArray::try_new(elements.into_array(), dim, Validity::NonNullable, num_rows)
113+
.vortex_expect("FSL with valid shape and matching children length");
114+
115+
let ext_dtype = ExtDType::<Vector>::try_new(EmptyMetadata, fsl.dtype().clone())
116+
.vortex_expect("Vector extension dtype is valid for an f32 FSL")
117+
.erased();
118+
ExtensionArray::new(ext_dtype, fsl.into_array()).into_array()
119+
}
120+
121+
/// Pull the `row`-th vector out of a `Vector<dim, f32>` extension array as a plain `Vec<f32>`.
122+
///
123+
/// Used to extract a single query vector from a batch of generated data. The input must already
124+
/// be fully materialized (no lazy scalar-fn wrappers); pass a raw array from
125+
/// [`generate_random_vectors`], not a compressed variant.
126+
pub fn extract_row_as_query(vectors: &ArrayRef, row: usize, dim: u32) -> Vec<f32> {
127+
let ext = vectors
128+
.as_opt::<Extension>()
129+
.vortex_expect("data must be a Vector extension array");
130+
131+
let mut ctx = SESSION.create_execution_ctx();
132+
let fsl: FixedSizeListArray = ext
133+
.storage_array()
134+
.clone()
135+
.execute(&mut ctx)
136+
.vortex_expect("storage array executes to an FSL");
137+
let elements: PrimitiveArray = fsl
138+
.elements()
139+
.clone()
140+
.execute(&mut ctx)
141+
.vortex_expect("FSL elements execute to a PrimitiveArray");
142+
143+
let slice = elements.as_slice::<f32>();
144+
let dim_usize = dim as usize;
145+
let start = row * dim_usize;
146+
slice[start..start + dim_usize].to_vec()
147+
}
148+
149+
/// Build a `Vector<dim, f32>` extension array whose storage is a [`ConstantArray`] broadcasting a
150+
/// single query vector across `num_rows` rows. This is how we hand a single query vector to
151+
/// `CosineSimilarity` on the `rhs` side -- `ScalarFnArray` requires both children to have the
152+
/// same length, so we broadcast the query instead of hand-rolling a 1-row input.
153+
fn build_constant_query_vector(query: &[f32], num_rows: usize) -> VortexResult<ArrayRef> {
154+
let element_dtype = DType::Primitive(PType::F32, Nullability::NonNullable);
155+
156+
let children: Vec<Scalar> = query
157+
.iter()
158+
.map(|&v| Scalar::primitive(v, Nullability::NonNullable))
159+
.collect();
160+
let storage_scalar = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable);
161+
162+
let storage = ConstantArray::new(storage_scalar, num_rows).into_array();
163+
164+
let ext_dtype = ExtDType::<Vector>::try_new(EmptyMetadata, storage.dtype().clone())?.erased();
165+
Ok(ExtensionArray::new(ext_dtype, storage).into_array())
166+
}
167+
168+
/// Compresses a raw `Vector<dim, f32>` array with the default BtrBlocks pipeline.
169+
///
170+
/// [`BtrBlocksCompressor`] walks into the extension array and recursively compresses the
171+
/// underlying FSL storage child. TurboQuant is *not* exercised by this path -- it is not
172+
/// registered in the default scheme set -- so this measures "generic" lossless compression
173+
/// applied to float vectors.
174+
pub fn compress_default(data: ArrayRef) -> VortexResult<ArrayRef> {
175+
BtrBlocksCompressor::default().compress(&data)
176+
}
177+
178+
/// Compresses a raw `Vector<dim, f32>` array with the TurboQuant pipeline by hand, producing the
179+
/// same tree shape that
180+
/// [`vortex_tensor::encodings::turboquant::TurboQuantScheme`] would:
181+
///
182+
/// ```text
183+
/// L2Denorm(SorfTransform(FSL(Dict(codes, centroids))), norms)
184+
/// ```
185+
///
186+
/// Calling the encode helpers directly (instead of going through
187+
/// `BtrBlocksCompressorBuilder::with_turboquant()`) lets this example avoid depending on the
188+
/// `unstable_encodings` feature flag.
189+
///
190+
/// See `vortex-tensor/src/encodings/turboquant/tests/mod.rs::normalize_and_encode` for the same
191+
/// canonical recipe.
192+
pub fn compress_turboquant(data: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
193+
let l2_denorm = normalize_as_l2_denorm(data, ctx)?;
194+
let normalized = l2_denorm.child_at(0).clone();
195+
let norms = l2_denorm.child_at(1).clone();
196+
let num_rows = l2_denorm.len();
197+
198+
let normalized_ext = normalized
199+
.as_opt::<Extension>()
200+
.vortex_expect("normalized child should be an Extension array");
201+
202+
let config = TurboQuantConfig::default();
203+
// SAFETY: `normalize_as_l2_denorm` guarantees every row is unit-norm (or zero), which is the
204+
// invariant `turboquant_encode_unchecked` expects.
205+
let tq = unsafe { turboquant_encode_unchecked(normalized_ext, &config, ctx) }?;
206+
207+
Ok(unsafe { L2Denorm::new_array_unchecked(tq, norms, num_rows) }?.into_array())
208+
}
209+
210+
/// Dispatch helper that builds the data array for the requested [`Variant`], starting from a
211+
/// single random-vector generation. Always returns an `ArrayRef` whose logical dtype is
212+
/// `Vector<dim, f32>`.
213+
pub fn build_variant(
214+
variant: Variant,
215+
num_rows: usize,
216+
dim: u32,
217+
seed: u64,
218+
ctx: &mut ExecutionCtx,
219+
) -> VortexResult<ArrayRef> {
220+
let raw = generate_random_vectors(num_rows, dim, seed);
221+
match variant {
222+
Variant::Uncompressed => Ok(raw),
223+
Variant::DefaultCompression => compress_default(raw),
224+
Variant::TurboQuant => compress_turboquant(raw, ctx),
225+
}
226+
}
227+
228+
/// Build the lazy similarity-search array tree for a prepared data array and a single query
229+
/// vector. The returned tree is a boolean array of length `data.len()` where position `i` is
230+
/// `true` iff `cosine_similarity(data[i], query) > threshold`.
231+
///
232+
/// The tree shape is:
233+
///
234+
/// ```text
235+
/// Binary(Gt, [
236+
/// CosineSimilarity([data, ConstantArray(query_vec, n)]),
237+
/// ConstantArray(threshold, n),
238+
/// ])
239+
/// ```
240+
///
241+
/// This function does no execution; it is safe to call inside a benchmark setup closure.
242+
pub fn build_similarity_search_tree(
243+
data: ArrayRef,
244+
query: &[f32],
245+
threshold: f32,
246+
) -> VortexResult<ArrayRef> {
247+
let num_rows = data.len();
248+
let query_vec = build_constant_query_vector(query, num_rows)?;
249+
250+
let cosine = CosineSimilarity::try_new_array(data, query_vec, num_rows)?.into_array();
251+
252+
let threshold_scalar = Scalar::primitive(threshold, Nullability::NonNullable);
253+
let threshold_array = ConstantArray::new(threshold_scalar, num_rows).into_array();
254+
255+
cosine.binary(threshold_array, Operator::Gt)
256+
}

0 commit comments

Comments
 (0)