@@ -354,13 +354,23 @@ mod provider_tests {
354354
355355#[ cfg( test) ]
356356mod integration_tests {
357- use dovahkiin:: types:: { Id , Map , OwnedMap , OwnedValue , Type } ;
357+ use dovahkiin:: types:: { Id , Map , OwnedMap , OwnedPrimArray , OwnedValue , Type } ;
358358 use neb:: {
359359 index:: embedding:: EmbeddingModel ,
360+ index:: vector:: HnswConfig ,
360361 ram:: schema:: { Field , IndexType , Schema } ,
361362 ram:: types:: RandValue ,
362363 } ;
363364
365+ use crate :: apps:: {
366+ embedding:: {
367+ EmbeddingIndexer ,
368+ schema:: {
369+ create_embedding_cell_schema_for_client, embedding_cell_id, vector_field_id,
370+ } ,
371+ } ,
372+ hnsw:: measurements:: MetricEncoding ,
373+ } ;
364374 use crate :: tests:: start_server;
365375
366376 const TEST_SCHEMA_ID : u32 = 9001 ;
@@ -802,6 +812,78 @@ mod integration_tests {
802812 }
803813 }
804814
815+ #[ tokio:: test]
816+ #[ ignore] // Run with: cargo test test_embedding_cell_write_is_idempotent -- --ignored --exact --nocapture
817+ async fn test_embedding_cell_write_is_idempotent ( ) {
818+ let server = start_server ( 6021 , "embedding_upsert_regression" ) . await . unwrap ( ) ;
819+
820+ let _partition = server. init_hnsw_index_partition_service ( ) . await . unwrap ( ) ;
821+ let _hnsw = server. init_hnsw_index_service ( ) . await . unwrap ( ) ;
822+
823+ let runtime = server. current_runtime ( ) ;
824+ let indexer = EmbeddingIndexer :: new ( & runtime) . await . unwrap ( ) ;
825+
826+ const DIRECT_SCHEMA_ID : u32 = 9002 ;
827+ let field_id = bifrost_hasher:: hash_str ( TEST_FIELD ) ;
828+ let full_model = "test:test-model" ;
829+ let dimensions = 4usize ;
830+
831+ let emb_schema_id = create_embedding_cell_schema_for_client (
832+ server. neb_client . as_ref ( ) ,
833+ full_model,
834+ DIRECT_SCHEMA_ID ,
835+ field_id,
836+ dimensions,
837+ )
838+ . await
839+ . unwrap ( ) ;
840+
841+ indexer
842+ . hnsw_coordinator
843+ . new_index (
844+ format ! ( "EMB-{DIRECT_SCHEMA_ID}-{field_id}-{full_model}" ) ,
845+ emb_schema_id,
846+ vector_field_id ( ) ,
847+ HnswConfig :: default ( ) ,
848+ )
849+ . await
850+ . unwrap ( )
851+ . unwrap ( ) ;
852+
853+ let doc_id = Id :: rand ( ) ;
854+ let vector = vec ! [ 0.5 , 0.5 , 0.5 , 0.5 ] ;
855+
856+ indexer
857+ . write_embedding_cell ( & doc_id, DIRECT_SCHEMA_ID , field_id, full_model, vector. clone ( ) )
858+ . await
859+ . unwrap ( ) ;
860+
861+ indexer
862+ . write_embedding_cell ( & doc_id, DIRECT_SCHEMA_ID , field_id, full_model, vector. clone ( ) )
863+ . await
864+ . unwrap ( ) ;
865+
866+ let emb_cell_id = embedding_cell_id ( & doc_id, DIRECT_SCHEMA_ID , field_id, full_model) ;
867+ server. neb_client . read_cell ( emb_cell_id) . await . unwrap ( ) . unwrap ( ) ;
868+
869+ let hits = indexer
870+ . hnsw_coordinator
871+ . query_top_k (
872+ emb_schema_id,
873+ vector_field_id ( ) ,
874+ OwnedPrimArray :: F32 ( vector) ,
875+ 10 ,
876+ 32 ,
877+ MetricEncoding :: Cosine ,
878+ )
879+ . await
880+ . unwrap ( )
881+ . unwrap ( ) ;
882+
883+ assert_eq ! ( hits. len( ) , 1 , "duplicate writes should not create duplicate vector entries" ) ;
884+ assert_eq ! ( hits[ 0 ] . 0 , emb_cell_id) ;
885+ }
886+
805887 /// Test multi-model support on the same field
806888 #[ tokio:: test]
807889 #[ ignore]
0 commit comments