@@ -1295,6 +1295,234 @@ mod schema_based_tests {
12951295 println ! ( "=== Multi-Index Schema Test PASSED ===" ) ;
12961296 }
12971297
1298+ /// Test compound embedding index (e.g., aliases + description in Wikidata)
1299+ #[ tokio:: test]
1300+ #[ ignore]
1301+ async fn test_compound_embedding_index ( ) {
1302+ let _ = env_logger:: try_init ( ) ;
1303+
1304+ println ! ( "=== Compound Embedding Index Test ===\n " ) ;
1305+
1306+ const SCIENTIST_SCHEMA_ID : u32 = 10004 ;
1307+ const ID_FIELD : & str = "id" ;
1308+ const NAME_FIELD : & str = "name" ;
1309+ const ALIASES_FIELD : & str = "aliases" ;
1310+ const DESCRIPTION_FIELD : & str = "description" ;
1311+ const COMPOUND_NAME : & str = "aliases_description" ;
1312+ let compound_field_id = bifrost_hasher:: hash_str ( COMPOUND_NAME ) ;
1313+
1314+ fn scientist_schema ( ) -> Schema {
1315+ let embedding_model = EmbeddingModel :: new ( "gguf:nomic-embed-text-v1.5" ) ;
1316+ let fields = vec ! [
1317+ Field :: new_unindexed( ID_FIELD , Type :: String ) ,
1318+ Field :: new_unindexed( NAME_FIELD , Type :: String ) ,
1319+ Field :: new_unindexed_array( ALIASES_FIELD , Type :: String ) ,
1320+ Field :: new_unindexed( DESCRIPTION_FIELD , Type :: String ) ,
1321+ ] ;
1322+ let mut schema = Schema :: new_with_id (
1323+ SCIENTIST_SCHEMA_ID ,
1324+ "test_scientist" ,
1325+ None ,
1326+ Field :: new_schema ( fields) ,
1327+ false ,
1328+ false ,
1329+ ) ;
1330+ schema. add_compound_index (
1331+ COMPOUND_NAME ,
1332+ vec ! [ ALIASES_FIELD . to_string( ) , DESCRIPTION_FIELD . to_string( ) ] ,
1333+ vec ! [ IndexType :: Embedding ( embedding_model) ] ,
1334+ ) ;
1335+ schema
1336+ }
1337+
1338+ let server = start_server ( 6014 , "compound_embedding_test" )
1339+ . await
1340+ . unwrap ( ) ;
1341+ let _partition = server. init_hnsw_index_partition_service ( ) . await . unwrap ( ) ;
1342+ let _hnsw = server. init_hnsw_index_service ( ) . await . unwrap ( ) ;
1343+ crate :: server:: MorpheusServer :: init_embedding_service ( & server)
1344+ . await
1345+ . unwrap ( ) ;
1346+
1347+ let embedding_client = & server
1348+ . neb_server
1349+ . indexer
1350+ . as_ref ( )
1351+ . unwrap ( )
1352+ . clients
1353+ . embedding_client ;
1354+
1355+ if !embedding_client. is_embedding_index_core_set ( ) {
1356+ eprintln ! ( "✗ Embedding core not set, skipping test" ) ;
1357+ return ;
1358+ }
1359+
1360+ server
1361+ . neb_client
1362+ . new_schema_with_id ( scientist_schema ( ) )
1363+ . await
1364+ . unwrap ( )
1365+ . unwrap ( ) ;
1366+ println ! ( "✓ Scientist schema with compound embedding registered (index auto-created)" ) ;
1367+
1368+ let scientists = vec ! [
1369+ (
1370+ "Albert Einstein" ,
1371+ vec![ "Einstein" , "Albert" , "theoretical physicist" ] ,
1372+ "developed the theory of relativity and contributed to quantum mechanics" ,
1373+ ) ,
1374+ (
1375+ "Isaac Newton" ,
1376+ vec![ "Newton" , "Isaac" , "mathematician" , "physicist" ] ,
1377+ "formulated laws of motion and universal gravitation" ,
1378+ ) ,
1379+ (
1380+ "Marie Curie" ,
1381+ vec![ "Curie" , "Marie" , "Skłodowska" , "physicist" , "chemist" ] ,
1382+ "pioneering research on radioactivity and discovered polonium and radium" ,
1383+ ) ,
1384+ ] ;
1385+
1386+ let mut scientist_ids: Vec < ( Id , & str ) > = Vec :: new ( ) ;
1387+
1388+ println ! ( "\n Writing scientists..." ) ;
1389+ for ( name, aliases, description) in & scientists {
1390+ let id = Id :: rand ( ) ;
1391+ let mut map = OwnedMap :: new ( ) ;
1392+ map. insert ( ID_FIELD , OwnedValue :: String ( name. to_string ( ) ) ) ;
1393+ map. insert ( NAME_FIELD , OwnedValue :: String ( name. to_string ( ) ) ) ;
1394+ map. insert (
1395+ ALIASES_FIELD ,
1396+ OwnedValue :: PrimArray ( dovahkiin:: types:: OwnedPrimArray :: String (
1397+ aliases. iter ( ) . map ( |s| s. to_string ( ) ) . collect ( ) ,
1398+ ) ) ,
1399+ ) ;
1400+ map. insert (
1401+ DESCRIPTION_FIELD ,
1402+ OwnedValue :: String ( description. to_string ( ) ) ,
1403+ ) ;
1404+
1405+ let cell = OwnedCell :: new_with_id ( SCIENTIST_SCHEMA_ID , & id, OwnedValue :: Map ( map) ) ;
1406+ server. neb_client . write_cell ( cell) . await . unwrap ( ) . unwrap ( ) ;
1407+
1408+ println ! ( " ✓ {} written" , name) ;
1409+ scientist_ids. push ( ( id, name) ) ;
1410+ }
1411+
1412+ tokio:: time:: sleep ( tokio:: time:: Duration :: from_millis ( 1000 ) ) . await ;
1413+
1414+ println ! ( "\n === Testing Compound Index Search ===\n " ) ;
1415+
1416+ let query1 = "physicist relativity" ;
1417+ println ! ( "Query: \" {}\" " , query1) ;
1418+ let results1 = embedding_client
1419+ . search ( SCIENTIST_SCHEMA_ID , compound_field_id, query1, 3 )
1420+ . await
1421+ . unwrap ( ) ;
1422+
1423+ println ! ( "Results:" ) ;
1424+ for ( i, hit) in results1. iter ( ) . enumerate ( ) {
1425+ let name = scientist_ids
1426+ . iter ( )
1427+ . find ( |( id, _) | * id == hit. id )
1428+ . map ( |( _, n) | * n)
1429+ . unwrap_or ( "unknown" ) ;
1430+ println ! ( " {}. {} (score: {:.4})" , i + 1 , name, hit. score) ;
1431+ }
1432+
1433+ let einstein_id = scientist_ids
1434+ . iter ( )
1435+ . find ( |( _, n) | n. contains ( "Einstein" ) )
1436+ . unwrap ( )
1437+ . 0 ;
1438+ assert_eq ! (
1439+ results1[ 0 ] . id, einstein_id,
1440+ "Einstein should be top for relativity query"
1441+ ) ;
1442+ println ! ( "✓ Correct: Einstein is top result\n " ) ;
1443+
1444+ let query2 = "mathematician gravity" ;
1445+ println ! ( "Query: \" {}\" " , query2) ;
1446+ let results2 = embedding_client
1447+ . search ( SCIENTIST_SCHEMA_ID , compound_field_id, query2, 3 )
1448+ . await
1449+ . unwrap ( ) ;
1450+
1451+ println ! ( "Results:" ) ;
1452+ for ( i, hit) in results2. iter ( ) . enumerate ( ) {
1453+ let name = scientist_ids
1454+ . iter ( )
1455+ . find ( |( id, _) | * id == hit. id )
1456+ . map ( |( _, n) | * n)
1457+ . unwrap_or ( "unknown" ) ;
1458+ println ! ( " {}. {} (score: {:.4})" , i + 1 , name, hit. score) ;
1459+ }
1460+
1461+ let newton_id = scientist_ids
1462+ . iter ( )
1463+ . find ( |( _, n) | n. contains ( "Newton" ) )
1464+ . unwrap ( )
1465+ . 0 ;
1466+ assert_eq ! (
1467+ results2[ 0 ] . id, newton_id,
1468+ "Newton should be top for gravity query"
1469+ ) ;
1470+ println ! ( "✓ Correct: Newton is top result\n " ) ;
1471+
1472+ let query3 = "chemist radioactivity" ;
1473+ println ! ( "Query: \" {}\" " , query3) ;
1474+ let results3 = embedding_client
1475+ . search ( SCIENTIST_SCHEMA_ID , compound_field_id, query3, 3 )
1476+ . await
1477+ . unwrap ( ) ;
1478+
1479+ println ! ( "Results:" ) ;
1480+ for ( i, hit) in results3. iter ( ) . enumerate ( ) {
1481+ let name = scientist_ids
1482+ . iter ( )
1483+ . find ( |( id, _) | * id == hit. id )
1484+ . map ( |( _, n) | * n)
1485+ . unwrap_or ( "unknown" ) ;
1486+ println ! ( " {}. {} (score: {:.4})" , i + 1 , name, hit. score) ;
1487+ }
1488+
1489+ let curie_id = scientist_ids
1490+ . iter ( )
1491+ . find ( |( _, n) | n. contains ( "Curie" ) )
1492+ . unwrap ( )
1493+ . 0 ;
1494+ assert_eq ! (
1495+ results3[ 0 ] . id, curie_id,
1496+ "Curie should be top for radioactivity query"
1497+ ) ;
1498+ println ! ( "✓ Correct: Curie is top result\n " ) ;
1499+
1500+ println ! ( "=== Testing Alias Contribution ===" ) ;
1501+ let query_alias = "Skłodowska" ;
1502+ println ! ( "Query (alias only): \" {}\" " , query_alias) ;
1503+ let results_alias = embedding_client
1504+ . search ( SCIENTIST_SCHEMA_ID , compound_field_id, query_alias, 3 )
1505+ . await
1506+ . unwrap ( ) ;
1507+
1508+ println ! ( "Results:" ) ;
1509+ for ( i, hit) in results_alias. iter ( ) . enumerate ( ) {
1510+ let name = scientist_ids
1511+ . iter ( )
1512+ . find ( |( id, _) | * id == hit. id )
1513+ . map ( |( _, n) | * n)
1514+ . unwrap_or ( "unknown" ) ;
1515+ println ! ( " {}. {} (score: {:.4})" , i + 1 , name, hit. score) ;
1516+ }
1517+ assert_eq ! (
1518+ results_alias[ 0 ] . id, curie_id,
1519+ "Curie should match her maiden name Skłodowska"
1520+ ) ;
1521+ println ! ( "✓ Correct: Alias contributes to matching\n " ) ;
1522+
1523+ println ! ( "=== Compound Embedding Index Test PASSED ===" ) ;
1524+ }
1525+
12981526 /// Test embedding on array fields (like the aliases field in Wikidata)
12991527 #[ tokio:: test]
13001528 #[ ignore]
0 commit comments