Skip to content

Commit 00d8b25

Browse files
committed
Enhance logging and error handling in EmbeddingIndexer
- Added detailed logging for cache hits and misses in `get_index_info` to improve traceability during index retrieval. - Implemented error logging for failed attempts to read index metadata and handle missing indices more gracefully. - Introduced model variation attempts in the embedding search process to enhance robustness against configuration discrepancies. - Updated `Cargo.toml` to include a new `strip` setting for build configuration.
1 parent a4ff9f6 commit 00d8b25

3 files changed

Lines changed: 82 additions & 13 deletions

File tree

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,5 @@ debug = 2 # Full debug symbols
103103
opt-level = 3
104104
debug = true
105105
lto = true
106-
panic = 'unwind'
106+
panic = 'unwind'
107+
strip = "none"

src/apps/embedding/mod.rs

Lines changed: 79 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -400,18 +400,34 @@ impl EmbeddingIndexer {
400400

401401
// Check cache first
402402
if let Some(ptr_ref) = self.index_cache.as_ref().get(&key) {
403+
eprintln!("[get_index_info] Cache hit for schema={}, field={}, model={}", schema_id, field_id, model);
403404
return Ok(ptr_ref.clone());
404405
}
405406

406407
// Load from storage
407408
let index_id = embedding_index_id(schema_id, field_id, model);
409+
eprintln!(
410+
"[get_index_info] Cache miss, loading from storage: schema={}, field={}, model={}, index_id={:?}",
411+
schema_id, field_id, model, index_id
412+
);
413+
408414
let index_cell = self
409415
.morph
410416
.neb_client
411417
.read_cell(index_id)
412418
.await
413-
.map_err(|e| format!("Failed to read index metadata: {:?}", e))?
414-
.map_err(|e| format!("Index not found: {:?}", e))?;
419+
.map_err(|e| {
420+
let err = format!("Failed to read index metadata: {:?}", e);
421+
eprintln!("[get_index_info] ERROR: {}", err);
422+
err
423+
})?
424+
.map_err(|e| {
425+
let err = format!("Index not found: {:?}", e);
426+
eprintln!("[get_index_info] ERROR: {}", err);
427+
err
428+
})?;
429+
430+
eprintln!("[get_index_info] Successfully loaded index metadata from storage");
415431

416432
let emb_schema_id = index_cell[EMB_SCHEMA_ID]
417433
.u32()
@@ -674,15 +690,23 @@ impl EmbeddingIndexerCore for EmbeddingIndexer {
674690
) -> BoxFuture<'_, Result<Vec<EmbeddingHit>, IndexError>> {
675691
let query = query.to_string();
676692
async move {
693+
eprintln!(
694+
"[Embedding search] schema={}, field={}, default_provider={}, default_model={}",
695+
schema_id, field_id, self.default_provider, self.default_model
696+
);
697+
677698
// Find the index for this (schema, field)
678699
// If multiple models exist, we need to pick one
679700
// For now, use the first one found or default
680701
let (model, emb_schema_id) = {
681702
let models_key = (schema_id, field_id);
682703
let model_name =
683704
if let Some(models_list) = self.index_models.as_ref().get(&models_key) {
684-
models_list.read().first().cloned()
705+
let m = models_list.read().first().cloned();
706+
eprintln!("[Embedding search] Found in index_models: {:?}", m);
707+
m
685708
} else {
709+
eprintln!("[Embedding search] NOT in index_models, will try default");
686710
None
687711
};
688712

@@ -692,17 +716,61 @@ impl EmbeddingIndexerCore for EmbeddingIndexer {
692716
if let Some(ptr_ref) = self.index_cache.as_ref().get(&key) {
693717
(model, ptr_ref.clone().0)
694718
} else {
695-
return Err(IndexError::Other(format!(
696-
"No embedding index found for schema {} field {}",
697-
schema_id, field_id
698-
)));
719+
// Model in index_models but not in cache - lazy load from storage
720+
match self.get_index_info(schema_id, field_id, &model).await {
721+
Ok((emb_schema_id, _)) => (model, emb_schema_id),
722+
Err(_) => {
723+
return Err(IndexError::Other(format!(
724+
"No embedding index found for schema {} field {} (model: {})",
725+
schema_id, field_id, model
726+
)));
727+
}
728+
}
699729
}
700730
}
701731
None => {
702-
return Err(IndexError::Other(format!(
703-
"No embedding index found for schema {} field {}",
704-
schema_id, field_id
705-
)));
732+
// Cache miss - try to lazy-load from storage
733+
// Try multiple common model variations to handle recovery when
734+
// the server was started with different embedding config than during import
735+
let (provider_name, model_name) = (self.default_provider.clone(), self.default_model.clone());
736+
let default_full = Self::full_model_name(&provider_name, &model_name);
737+
738+
// Common model variations to try (order matters - try default first)
739+
let model_variations = vec![
740+
default_full.clone(), // Current default (e.g., "gguf:nomic-embed-text-v1.5")
741+
"ollama:nomic-embed-text".to_string(), // Ollama variant
742+
"gguf:multilingual-e5-base".to_string(), // Old default GGUF
743+
"e5:multilingual-e5-base".to_string(), // E5 variant
744+
];
745+
746+
eprintln!("[Embedding search] Trying model variations: {:?}", model_variations);
747+
748+
let mut found_model = None;
749+
for model_to_try in &model_variations {
750+
eprintln!("[Embedding search] Attempting: {}", model_to_try);
751+
match self.get_index_info(schema_id, field_id, model_to_try).await {
752+
Ok((emb_schema_id, _)) => {
753+
eprintln!("[Embedding search] SUCCESS with model: {}", model_to_try);
754+
found_model = Some((model_to_try.clone(), emb_schema_id));
755+
break;
756+
}
757+
Err(e) => {
758+
eprintln!("[Embedding search] Failed with {}: {}", model_to_try, e);
759+
continue;
760+
}
761+
}
762+
}
763+
764+
// Return result or error
765+
match found_model {
766+
Some((model, emb_schema_id)) => (model, emb_schema_id),
767+
None => {
768+
return Err(IndexError::Other(format!(
769+
"No embedding index found for schema {} field {} (tried: {:?})",
770+
schema_id, field_id, model_variations
771+
)));
772+
}
773+
}
706774
}
707775
}
708776
};

src/apps/wikidata/schema.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ pub fn create_entity_metadata_schema() -> Schema {
9393
Type::String,
9494
vec![IndexType::Fulltext, IndexType::Embedding(embedding_model)],
9595
),
96-
// Field::new_indexed("description", Type::String, vec![IndexType::Fulltext]),
96+
//Field::new_indexed("description", Type::String, vec![IndexType::Fulltext]),
9797
Field::new_indexed_array("aliases", Type::String, vec![IndexType::Fulltext]),
9898
Field::new_unindexed_nullable("sitelinks", Type::String), // Optional: JSON string
9999
Field::new_unindexed_nullable("other_data", Type::String), // Optional: JSON string

0 commit comments

Comments
 (0)