|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +//! Shared helpers for the similarity-search benchmark and example. |
| 5 | +//! |
| 6 | +//! This module is included from both `vortex-tensor/benches/similarity_search.rs` and |
| 7 | +//! `vortex-tensor/examples/similarity_search.rs` via an explicit `#[path = ...]` so both targets |
| 8 | +//! use the exact same array-tree builder. |
| 9 | +//! |
| 10 | +//! The three main entry points are: |
| 11 | +//! |
| 12 | +//! - [`generate_random_vectors`] to build a deterministic random [`Vector`] extension array. |
| 13 | +//! - [`build_variant`] to take a raw vector array and apply the requested compression strategy |
| 14 | +//! (uncompressed, default BtrBlocks, or TurboQuant). |
| 15 | +//! - [`build_similarity_search_tree`] to wire a cosine-similarity + threshold expression on top of |
| 16 | +//! a prepared data array and a single-row query vector. |
| 17 | +//! |
| 18 | +//! [`Vector`]: vortex_tensor::vector::Vector |
| 19 | +
|
| 20 | +#![allow(dead_code)] |
| 21 | + |
| 22 | +use std::fmt; |
| 23 | +use std::sync::LazyLock; |
| 24 | + |
| 25 | +use rand::SeedableRng; |
| 26 | +use rand::rngs::StdRng; |
| 27 | +use rand_distr::Distribution; |
| 28 | +use rand_distr::Normal; |
| 29 | +use vortex_array::ArrayRef; |
| 30 | +use vortex_array::ExecutionCtx; |
| 31 | +use vortex_array::IntoArray; |
| 32 | +use vortex_array::VortexSessionExecute; |
| 33 | +use vortex_array::arrays::ConstantArray; |
| 34 | +use vortex_array::arrays::Extension; |
| 35 | +use vortex_array::arrays::ExtensionArray; |
| 36 | +use vortex_array::arrays::FixedSizeListArray; |
| 37 | +use vortex_array::arrays::PrimitiveArray; |
| 38 | +use vortex_array::arrays::extension::ExtensionArrayExt; |
| 39 | +use vortex_array::arrays::fixed_size_list::FixedSizeListArrayExt; |
| 40 | +use vortex_array::arrays::scalar_fn::ScalarFnArrayExt; |
| 41 | +use vortex_array::builtins::ArrayBuiltins; |
| 42 | +use vortex_array::dtype::DType; |
| 43 | +use vortex_array::dtype::Nullability; |
| 44 | +use vortex_array::dtype::PType; |
| 45 | +use vortex_array::dtype::extension::ExtDType; |
| 46 | +use vortex_array::extension::EmptyMetadata; |
| 47 | +use vortex_array::scalar::Scalar; |
| 48 | +use vortex_array::scalar_fn::fns::operators::Operator; |
| 49 | +use vortex_array::session::ArraySession; |
| 50 | +use vortex_array::validity::Validity; |
| 51 | +use vortex_btrblocks::BtrBlocksCompressor; |
| 52 | +use vortex_buffer::BufferMut; |
| 53 | +use vortex_error::VortexExpect; |
| 54 | +use vortex_error::VortexResult; |
| 55 | +use vortex_error::vortex_panic; |
| 56 | +use vortex_session::VortexSession; |
| 57 | +use vortex_tensor::encodings::turboquant::TurboQuantConfig; |
| 58 | +use vortex_tensor::encodings::turboquant::turboquant_encode_unchecked; |
| 59 | +use vortex_tensor::scalar_fns::cosine_similarity::CosineSimilarity; |
| 60 | +use vortex_tensor::scalar_fns::l2_denorm::L2Denorm; |
| 61 | +use vortex_tensor::scalar_fns::l2_denorm::normalize_as_l2_denorm; |
| 62 | +use vortex_tensor::vector::Vector; |
| 63 | + |
| 64 | +/// A shared [`VortexSession`] pre-loaded with the builtin [`ArraySession`] so both bench and |
| 65 | +/// example can create execution contexts cheaply. |
| 66 | +pub static SESSION: LazyLock<VortexSession> = |
| 67 | + LazyLock::new(|| VortexSession::empty().with::<ArraySession>()); |
| 68 | + |
| 69 | +/// The three compression strategies the benchmark and example exercise. |
| 70 | +#[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 71 | +pub enum Variant { |
| 72 | + /// Raw `Vector<dim, f32>` with no compression applied. |
| 73 | + Uncompressed, |
| 74 | + /// `BtrBlocksCompressor::default()` walks into the extension array and compresses the |
| 75 | + /// underlying FSL storage child with the default scheme set (no TurboQuant). |
| 76 | + DefaultCompression, |
| 77 | + /// TurboQuant: normalize, quantize to `FSL(Dict)`, wrap in SORF + `L2Denorm`. |
| 78 | + TurboQuant, |
| 79 | +} |
| 80 | + |
| 81 | +impl fmt::Display for Variant { |
| 82 | + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| 83 | + match self { |
| 84 | + Self::Uncompressed => f.write_str("Uncompressed"), |
| 85 | + Self::DefaultCompression => f.write_str("DefaultCompression"), |
| 86 | + Self::TurboQuant => f.write_str("TurboQuant"), |
| 87 | + } |
| 88 | + } |
| 89 | +} |
| 90 | + |
| 91 | +/// Generate `num_rows` random f32 vectors of dimension `dim`, wrapped in a [`Vector`] extension |
| 92 | +/// array. The values are drawn from a standard normal distribution seeded by `seed` so results |
| 93 | +/// are reproducible across runs. |
| 94 | +/// |
| 95 | +/// [`Vector`]: vortex_tensor::vector::Vector |
| 96 | +pub fn generate_random_vectors(num_rows: usize, dim: u32, seed: u64) -> ArrayRef { |
| 97 | + let mut rng = StdRng::seed_from_u64(seed); |
| 98 | + // `Normal::new(0, 1)` is infallible for these parameters. `rand_distr::NormalError` does |
| 99 | + // not implement `Into<VortexError>`, so we cannot use `vortex_expect` here; fall back to |
| 100 | + // `vortex_panic!` on the (impossible) error path instead. |
| 101 | + let normal = |
| 102 | + Normal::new(0.0f32, 1.0).unwrap_or_else(|_| vortex_panic!("Normal(0, 1) is well-defined")); |
| 103 | + |
| 104 | + let dim_usize = dim as usize; |
| 105 | + let mut buf = BufferMut::<f32>::with_capacity(num_rows * dim_usize); |
| 106 | + for _ in 0..(num_rows * dim_usize) { |
| 107 | + buf.push(normal.sample(&mut rng)); |
| 108 | + } |
| 109 | + |
| 110 | + let elements = PrimitiveArray::new::<f32>(buf.freeze(), Validity::NonNullable); |
| 111 | + let fsl = |
| 112 | + FixedSizeListArray::try_new(elements.into_array(), dim, Validity::NonNullable, num_rows) |
| 113 | + .vortex_expect("FSL with valid shape and matching children length"); |
| 114 | + |
| 115 | + let ext_dtype = ExtDType::<Vector>::try_new(EmptyMetadata, fsl.dtype().clone()) |
| 116 | + .vortex_expect("Vector extension dtype is valid for an f32 FSL") |
| 117 | + .erased(); |
| 118 | + ExtensionArray::new(ext_dtype, fsl.into_array()).into_array() |
| 119 | +} |
| 120 | + |
| 121 | +/// Pull the `row`-th vector out of a `Vector<dim, f32>` extension array as a plain `Vec<f32>`. |
| 122 | +/// |
| 123 | +/// Used to extract a single query vector from a batch of generated data. The input must already |
| 124 | +/// be fully materialized (no lazy scalar-fn wrappers); pass a raw array from |
| 125 | +/// [`generate_random_vectors`], not a compressed variant. |
| 126 | +pub fn extract_row_as_query(vectors: &ArrayRef, row: usize, dim: u32) -> Vec<f32> { |
| 127 | + let ext = vectors |
| 128 | + .as_opt::<Extension>() |
| 129 | + .vortex_expect("data must be a Vector extension array"); |
| 130 | + |
| 131 | + let mut ctx = SESSION.create_execution_ctx(); |
| 132 | + let fsl: FixedSizeListArray = ext |
| 133 | + .storage_array() |
| 134 | + .clone() |
| 135 | + .execute(&mut ctx) |
| 136 | + .vortex_expect("storage array executes to an FSL"); |
| 137 | + let elements: PrimitiveArray = fsl |
| 138 | + .elements() |
| 139 | + .clone() |
| 140 | + .execute(&mut ctx) |
| 141 | + .vortex_expect("FSL elements execute to a PrimitiveArray"); |
| 142 | + |
| 143 | + let slice = elements.as_slice::<f32>(); |
| 144 | + let dim_usize = dim as usize; |
| 145 | + let start = row * dim_usize; |
| 146 | + slice[start..start + dim_usize].to_vec() |
| 147 | +} |
| 148 | + |
| 149 | +/// Build a `Vector<dim, f32>` extension array whose storage is a [`ConstantArray`] broadcasting a |
| 150 | +/// single query vector across `num_rows` rows. This is how we hand a single query vector to |
| 151 | +/// `CosineSimilarity` on the `rhs` side -- `ScalarFnArray` requires both children to have the |
| 152 | +/// same length, so we broadcast the query instead of hand-rolling a 1-row input. |
| 153 | +fn build_constant_query_vector(query: &[f32], num_rows: usize) -> VortexResult<ArrayRef> { |
| 154 | + let element_dtype = DType::Primitive(PType::F32, Nullability::NonNullable); |
| 155 | + |
| 156 | + let children: Vec<Scalar> = query |
| 157 | + .iter() |
| 158 | + .map(|&v| Scalar::primitive(v, Nullability::NonNullable)) |
| 159 | + .collect(); |
| 160 | + let storage_scalar = Scalar::fixed_size_list(element_dtype, children, Nullability::NonNullable); |
| 161 | + |
| 162 | + let storage = ConstantArray::new(storage_scalar, num_rows).into_array(); |
| 163 | + |
| 164 | + let ext_dtype = ExtDType::<Vector>::try_new(EmptyMetadata, storage.dtype().clone())?.erased(); |
| 165 | + Ok(ExtensionArray::new(ext_dtype, storage).into_array()) |
| 166 | +} |
| 167 | + |
| 168 | +/// Compresses a raw `Vector<dim, f32>` array with the default BtrBlocks pipeline. |
| 169 | +/// |
| 170 | +/// [`BtrBlocksCompressor`] walks into the extension array and recursively compresses the |
| 171 | +/// underlying FSL storage child. TurboQuant is *not* exercised by this path -- it is not |
| 172 | +/// registered in the default scheme set -- so this measures "generic" lossless compression |
| 173 | +/// applied to float vectors. |
| 174 | +pub fn compress_default(data: ArrayRef) -> VortexResult<ArrayRef> { |
| 175 | + BtrBlocksCompressor::default().compress(&data) |
| 176 | +} |
| 177 | + |
| 178 | +/// Compresses a raw `Vector<dim, f32>` array with the TurboQuant pipeline by hand, producing the |
| 179 | +/// same tree shape that |
| 180 | +/// [`vortex_tensor::encodings::turboquant::TurboQuantScheme`] would: |
| 181 | +/// |
| 182 | +/// ```text |
| 183 | +/// L2Denorm(SorfTransform(FSL(Dict(codes, centroids))), norms) |
| 184 | +/// ``` |
| 185 | +/// |
| 186 | +/// Calling the encode helpers directly (instead of going through |
| 187 | +/// `BtrBlocksCompressorBuilder::with_turboquant()`) lets this example avoid depending on the |
| 188 | +/// `unstable_encodings` feature flag. |
| 189 | +/// |
| 190 | +/// See `vortex-tensor/src/encodings/turboquant/tests/mod.rs::normalize_and_encode` for the same |
| 191 | +/// canonical recipe. |
| 192 | +pub fn compress_turboquant(data: ArrayRef, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> { |
| 193 | + let l2_denorm = normalize_as_l2_denorm(data, ctx)?; |
| 194 | + let normalized = l2_denorm.child_at(0).clone(); |
| 195 | + let norms = l2_denorm.child_at(1).clone(); |
| 196 | + let num_rows = l2_denorm.len(); |
| 197 | + |
| 198 | + let normalized_ext = normalized |
| 199 | + .as_opt::<Extension>() |
| 200 | + .vortex_expect("normalized child should be an Extension array"); |
| 201 | + |
| 202 | + let config = TurboQuantConfig::default(); |
| 203 | + // SAFETY: `normalize_as_l2_denorm` guarantees every row is unit-norm (or zero), which is the |
| 204 | + // invariant `turboquant_encode_unchecked` expects. |
| 205 | + let tq = unsafe { turboquant_encode_unchecked(normalized_ext, &config, ctx) }?; |
| 206 | + |
| 207 | + Ok(unsafe { L2Denorm::new_array_unchecked(tq, norms, num_rows) }?.into_array()) |
| 208 | +} |
| 209 | + |
| 210 | +/// Dispatch helper that builds the data array for the requested [`Variant`], starting from a |
| 211 | +/// single random-vector generation. Always returns an `ArrayRef` whose logical dtype is |
| 212 | +/// `Vector<dim, f32>`. |
| 213 | +pub fn build_variant( |
| 214 | + variant: Variant, |
| 215 | + num_rows: usize, |
| 216 | + dim: u32, |
| 217 | + seed: u64, |
| 218 | + ctx: &mut ExecutionCtx, |
| 219 | +) -> VortexResult<ArrayRef> { |
| 220 | + let raw = generate_random_vectors(num_rows, dim, seed); |
| 221 | + match variant { |
| 222 | + Variant::Uncompressed => Ok(raw), |
| 223 | + Variant::DefaultCompression => compress_default(raw), |
| 224 | + Variant::TurboQuant => compress_turboquant(raw, ctx), |
| 225 | + } |
| 226 | +} |
| 227 | + |
| 228 | +/// Build the lazy similarity-search array tree for a prepared data array and a single query |
| 229 | +/// vector. The returned tree is a boolean array of length `data.len()` where position `i` is |
| 230 | +/// `true` iff `cosine_similarity(data[i], query) > threshold`. |
| 231 | +/// |
| 232 | +/// The tree shape is: |
| 233 | +/// |
| 234 | +/// ```text |
| 235 | +/// Binary(Gt, [ |
| 236 | +/// CosineSimilarity([data, ConstantArray(query_vec, n)]), |
| 237 | +/// ConstantArray(threshold, n), |
| 238 | +/// ]) |
| 239 | +/// ``` |
| 240 | +/// |
| 241 | +/// This function does no execution; it is safe to call inside a benchmark setup closure. |
| 242 | +pub fn build_similarity_search_tree( |
| 243 | + data: ArrayRef, |
| 244 | + query: &[f32], |
| 245 | + threshold: f32, |
| 246 | +) -> VortexResult<ArrayRef> { |
| 247 | + let num_rows = data.len(); |
| 248 | + let query_vec = build_constant_query_vector(query, num_rows)?; |
| 249 | + |
| 250 | + let cosine = CosineSimilarity::try_new_array(data, query_vec, num_rows)?.into_array(); |
| 251 | + |
| 252 | + let threshold_scalar = Scalar::primitive(threshold, Nullability::NonNullable); |
| 253 | + let threshold_array = ConstantArray::new(threshold_scalar, num_rows).into_array(); |
| 254 | + |
| 255 | + cosine.binary(threshold_array, Operator::Gt) |
| 256 | +} |
0 commit comments