@@ -57,6 +57,7 @@ use schema::*;
5757// Re-exports
5858pub use config:: EmbeddingConfig ;
5959pub use provider:: batch_coordinator:: EmbeddingBatchCoordinator ;
60+ #[ cfg( feature = "gguf" ) ]
6061pub use provider:: gguf:: { GgufEmbedding , GgufModelConfig } ;
6162pub use provider:: ollama:: OllamaEmbedding ;
6263pub use provider:: { EmbeddingProvider , EmbeddingResult } ;
@@ -115,9 +116,14 @@ impl EmbeddingIndexer {
115116 let indexer = Self {
116117 providers : Arc :: new ( PtrHashMap :: with_capacity ( 16 ) ) ,
117118 provider_names : Arc :: new ( RwLock :: new ( Vec :: new ( ) ) ) ,
118- // Default to GGUF provider with all-MiniLM-L6-v2 model
119+ # [ cfg ( feature = "gguf" ) ]
119120 default_provider : "gguf" . to_string ( ) ,
121+ #[ cfg( feature = "gguf" ) ]
120122 default_model : provider:: gguf:: DEFAULT_MODEL_NAME . to_string ( ) ,
123+ #[ cfg( not( feature = "gguf" ) ) ]
124+ default_provider : "ollama" . to_string ( ) ,
125+ #[ cfg( not( feature = "gguf" ) ) ]
126+ default_model : "nomic-embed-text" . to_string ( ) ,
121127 index_cache : Arc :: new ( PtrHashMap :: with_capacity ( 64 ) ) ,
122128 index_models : Arc :: new ( PtrHashMap :: with_capacity ( 64 ) ) ,
123129 hnsw_coordinator,
@@ -757,22 +763,23 @@ pub async fn initialize_embedding_service_for_runtime(
757763) -> Result < ( ) , String > {
758764 let mut indexer = EmbeddingIndexer :: new ( runtime) . await ?;
759765
760- // Use GgufEmbedding (auto-downloads from HuggingFace)
761- // - GGUF format via embellama
762- // - Auto-detects embedding dimensions
763- // - Configured for high-end GPU (larger batches)
764- let model_dir = GgufEmbedding :: default_model_dir ( ) ;
765- let config = provider:: gguf:: GgufModelConfig :: default ( ) . high_end_gpu ( ) ;
766- let gguf = GgufEmbedding :: with_config ( & model_dir, config) ?;
767- info ! (
768- "GGUF embedding provider initialized (model: {}, dims: {})" ,
769- gguf. model_name( ) ,
770- gguf. embedding_dim( )
771- ) ;
772- indexer. register_provider ( Arc :: new ( gguf) ) ;
773-
774- // Set default provider and model
775- indexer. set_default ( "gguf" , provider:: gguf:: DEFAULT_MODEL_NAME ) ;
766+ #[ cfg( feature = "gguf" ) ]
767+ {
768+ // Use GgufEmbedding (auto-downloads from HuggingFace)
769+ // - GGUF format via embellama
770+ // - Auto-detects embedding dimensions
771+ // - Configured for high-end GPU (larger batches)
772+ let model_dir = GgufEmbedding :: default_model_dir ( ) ;
773+ let config = provider:: gguf:: GgufModelConfig :: default ( ) . high_end_gpu ( ) ;
774+ let gguf = GgufEmbedding :: with_config ( & model_dir, config) ?;
775+ info ! (
776+ "GGUF embedding provider initialized (model: {}, dims: {})" ,
777+ gguf. model_name( ) ,
778+ gguf. embedding_dim( )
779+ ) ;
780+ indexer. register_provider ( Arc :: new ( gguf) ) ;
781+ indexer. set_default ( "gguf" , provider:: gguf:: DEFAULT_MODEL_NAME ) ;
782+ }
776783
777784 // Set on Neb server's embedding client
778785 if let Some ( index_builder) = runtime. database_runtime ( ) . indexer ( ) {
@@ -802,17 +809,69 @@ pub async fn initialize_embedding_service_for_runtime_with_config(
802809) -> Result < ( ) , String > {
803810 let mut indexer = EmbeddingIndexer :: new ( runtime) . await ?;
804811
805- // Register Ollama provider with configured URL and dimensions
806- let ollama =
807- OllamaEmbedding :: new_with_dimensions ( & config. url , & config. model , config. dimensions ) ;
808- info ! (
809- "Ollama embedding provider initialized (URL: {}, model: {}, dims: {})" ,
810- config. url, config. model, config. dimensions
811- ) ;
812- indexer. register_provider ( Arc :: new ( ollama) ) ;
813-
814- // Set default provider and model
815- indexer. set_default ( "ollama" , & config. model ) ;
812+ match config {
813+ EmbeddingConfig :: Ollama ( ollama_cfg) => {
814+ let ollama = OllamaEmbedding :: new ( & ollama_cfg. url ) ;
815+ let dimensions = match ollama_cfg. dimensions {
816+ Some ( d) => d,
817+ None => {
818+ // Auto-detect by probing the model with a single embedding
819+ let probe = ollama
820+ . embed_document ( & ollama_cfg. model , "probe" )
821+ . await
822+ . map_err ( |e| {
823+ format ! (
824+ "Failed to auto-detect dimensions for Ollama model '{}': {}" ,
825+ ollama_cfg. model, e
826+ )
827+ } ) ?;
828+ probe. dimensions
829+ }
830+ } ;
831+ let ollama = OllamaEmbedding :: new_with_dimensions (
832+ & ollama_cfg. url ,
833+ & ollama_cfg. model ,
834+ dimensions,
835+ ) ;
836+ info ! (
837+ "Ollama embedding provider initialized (URL: {}, model: {}, dims: {})" ,
838+ ollama_cfg. url, ollama_cfg. model, dimensions
839+ ) ;
840+ indexer. register_provider ( Arc :: new ( ollama) ) ;
841+ indexer. set_default ( "ollama" , & ollama_cfg. model ) ;
842+ }
843+ #[ cfg( feature = "gguf" ) ]
844+ EmbeddingConfig :: Gguf ( gguf_cfg) => {
845+ let model_dir = gguf_cfg
846+ . model_dir
847+ . clone ( )
848+ . unwrap_or_else ( GgufEmbedding :: default_model_dir) ;
849+ let mut gguf_model_config = provider:: gguf:: GgufModelConfig :: default ( ) ;
850+ if let Some ( hf_model_id) = & gguf_cfg. hf_model_id {
851+ gguf_model_config. hf_model_id = hf_model_id. clone ( ) ;
852+ }
853+ if let Some ( gguf_file) = & gguf_cfg. gguf_file {
854+ gguf_model_config. gguf_file = gguf_file. clone ( ) ;
855+ }
856+ if let Some ( model_name) = & gguf_cfg. model_name {
857+ gguf_model_config. model_name = model_name. clone ( ) ;
858+ } else if gguf_cfg. gguf_file . is_some ( ) {
859+ // Derive model_name from the gguf filename stem when not explicitly set
860+ gguf_model_config. model_name = gguf_model_config
861+ . gguf_file
862+ . trim_end_matches ( ".gguf" )
863+ . to_string ( ) ;
864+ }
865+ let gguf = GgufEmbedding :: with_config ( & model_dir, gguf_model_config) ?;
866+ info ! (
867+ "GGUF embedding provider initialized (model: {}, dims: {})" ,
868+ gguf. model_name( ) ,
869+ gguf. embedding_dim( )
870+ ) ;
871+ indexer. register_provider ( Arc :: new ( gguf) ) ;
872+ indexer. set_default ( "gguf" , provider:: gguf:: DEFAULT_MODEL_NAME ) ;
873+ }
874+ }
816875
817876 // Set on Neb server's embedding client
818877 if let Some ( index_builder) = runtime. database_runtime ( ) . indexer ( ) {
0 commit comments