ShisoftResearch
diff --git a/‎Cargo.toml‎
Lines changed: 11 additions & 9 deletions b/‎Cargo.toml‎
Lines changed: 11 additions & 9 deletions
diff --git a/‎config/log4rs.yaml‎
Lines changed: 1 addition & 1 deletion b/‎config/log4rs.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎config/wikidata_server.yaml‎
Lines changed: 19 additions & 0 deletions b/‎config/wikidata_server.yaml‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎src/apps/embedding/config.rs‎
Lines changed: 75 additions & 29 deletions b/‎src/apps/embedding/config.rs‎
Lines changed: 75 additions & 29 deletions
diff --git a/‎src/apps/embedding/mod.rs‎
Lines changed: 87 additions & 28 deletions b/‎src/apps/embedding/mod.rs‎
Lines changed: 87 additions & 28 deletions
diff --git a/‎src/apps/embedding/provider/mod.rs‎
Lines changed: 1 addition & 0 deletions b/‎src/apps/embedding/provider/mod.rs‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/apps/embedding/tests.rs‎
Lines changed: 2 additions & 0 deletions b/‎src/apps/embedding/tests.rs‎
Lines changed: 2 additions & 0 deletions
@@ -39,7 +39,7 @@ async-std = "1"
 ahash = "0.8.11"
 once_cell = "1.21.3"
 itertools = "0.14.0"
-moka = "0.12"
+moka = { version = "0.12", features = ["future", "sync"] }
 dashmap = "6.1"
 ndarray = "0.16.1"
 serde_json = "1"
@@ -57,21 +57,23 @@ tower-http = { version = "0.5", features = ["cors"] }
 bs58 = "0.5"
 
 # Embedding dependencies
-embellama = "0.8.0"
+embellama = { version = "0.8.0", optional = true }
 async-trait = "0.1"
 reqwest = { version = "0.12", features = ["json"] }
-hf-hub = "0.4"
+hf-hub = { version = "0.4", optional = true }
 
 [features]
-default = ["cpu_optimized"]
+default = []
+# Local GGUF embedding via llama.cpp (compiles llama.cpp C++ — slow first build)
+gguf = ["embellama", "hf-hub"]
+# GGUF + CPU-optimized llama.cpp (recommended when using GGUF without a GPU)
+cpu_optimized = ["gguf", "embellama/cpu-optimized"]
 # Enable CUDA GPU acceleration for embedding (requires NVIDIA GPU and CUDA toolkit)
-cuda = ["embellama/cuda"]
+cuda = ["gguf", "embellama/cuda"]
 # Enable Metal GPU acceleration for embedding (macOS only)
-metal = ["embellama/metal"]
+metal = ["gguf", "embellama/metal"]
 # Enable Vulkan GPU acceleration for embedding
-vulkan = ["embellama/vulkan"]
-
-cpu_optimized = ["embellama/cpu-optimized"]
+vulkan = ["gguf", "embellama/vulkan"]
 
 # Enable detailed logging in hot paths (Trace + Debug + verbose Info)
 verbose-logging = []
 
@@ -8,7 +8,7 @@ appenders:
     encoder:
       pattern: "{d} - {m}{n}"
 root:
-  level: Debug
+  level: info
   appenders:
     - stdout
 loggers:
 
@@ -1,4 +1,23 @@
 server_addr: 127.0.0.1:5401
+http_addr: 0.0.0.0:8080  # HTTP gateway bind address (overrides MORPHEUS_HTTP_ADDR env var)
+
+# Embedding provider (optional — omit to disable embedding service)
+# Ollama (no extra compilation needed):
+#   `dimensions` is optional — auto-detected by probing the model at startup if omitted.
+embedding:
+  provider: ollama
+  url: http://192.168.10.238:11434/
+  model: nomic-embed-text-v2-moe
+  # dimensions: 512   # optional override
+# Local GGUF via llama.cpp (build with --features gguf or cpu_optimized):
+#   All fields except `provider` are optional; omitting hf_model_id uses the
+#   default nomic-embed-text-v1.5 model (auto-downloaded from HuggingFace Hub).
+# embedding:
+#   provider: gguf
+#   hf_model_id: CompendiumLabs/bge-base-en-v1.5-gguf
+#   gguf_file: bge-base-en-v1.5-q8_0.gguf
+#   model_name: bge-base-en-v1.5        # optional, derived from gguf_file if omitted
+#   model_dir: /opt/models              # optional, defaults to ~/.cache/morpheus/models
 group_name: WikidataMorpheus
 meta_members:
   - 127.0.0.1:5401
 
@@ -1,57 +1,103 @@
 //! Embedding Service Configuration
-//!
-//! Simple configuration for Ollama embedding provider.
 
 use serde::{Deserialize, Serialize};
 
-/// Embedding service configuration
+/// Ollama embedding provider configuration
 #[derive(Debug, Clone, Deserialize, Serialize)]
-pub struct EmbeddingConfig {
+pub struct OllamaConfig {
     /// Ollama server URL (default: http://localhost:11434)
-    #[serde(default = "default_url")]
+    #[serde(default = "default_ollama_url")]
     pub url: String,
 
     /// Model name (default: nomic-embed-text)
-    #[serde(default = "default_model")]
+    #[serde(default = "default_ollama_model")]
     pub model: String,
 
-    /// Vector dimensions (default: 768 for nomic-embed-text)
-    #[serde(default = "default_dimensions")]
-    pub dimensions: usize,
+    /// Vector dimensions. If omitted, auto-detected by probing the model at startup.
+    #[serde(default)]
+    pub dimensions: Option<usize>,
 }
 
-fn default_url() -> String {
+fn default_ollama_url() -> String {
     "http://localhost:11434".to_string()
 }
 
-fn default_model() -> String {
+fn default_ollama_model() -> String {
     "nomic-embed-text".to_string()
 }
 
-fn default_dimensions() -> usize {
-    768
-}
-
-impl Default for EmbeddingConfig {
+impl Default for OllamaConfig {
     fn default() -> Self {
         Self {
-            url: default_url(),
-            model: default_model(),
-            dimensions: default_dimensions(),
+            url: default_ollama_url(),
+            model: default_ollama_model(),
+            dimensions: None,
         }
     }
 }
 
-impl EmbeddingConfig {
-    /// Load from YAML string
-    pub fn from_yaml(yaml: &str) -> Result<Self, String> {
-        serde_yaml::from_str(yaml).map_err(|e| format!("Failed to parse embedding config: {}", e))
-    }
+/// GGUF (local llama.cpp) embedding provider configuration.
+/// Requires the `gguf` feature to be enabled.
+///
+/// The model is auto-downloaded from HuggingFace Hub if not already cached.
+///
+/// ```yaml
+/// embedding:
+///   provider: gguf
+///   hf_model_id: nomic-ai/nomic-embed-text-v1.5-GGUF
+///   gguf_file: nomic-embed-text-v1.5.Q8_0.gguf
+///   model_name: nomic-embed-text-v1.5
+///   model_dir: /opt/models   # optional cache dir, defaults to ~/.cache/morpheus/models
+/// ```
+#[cfg(feature = "gguf")]
+#[derive(Debug, Clone, Deserialize, Serialize, Default)]
+pub struct GgufConfig {
+    /// HuggingFace repository ID (e.g. "nomic-ai/nomic-embed-text-v1.5-GGUF").
+    /// Defaults to the built-in nomic-embed-text-v1.5 model.
+    #[serde(default)]
+    pub hf_model_id: Option<String>,
 
-    /// Load from YAML file
-    pub fn from_file(path: &str) -> Result<Self, String> {
-        let content = std::fs::read_to_string(path)
-            .map_err(|e| format!("Failed to read config file: {}", e))?;
-        Self::from_yaml(&content)
+    /// GGUF filename within the HuggingFace repository (e.g. "nomic-embed-text-v1.5.Q8_0.gguf").
+    /// Must be specified when `hf_model_id` is set.
+    #[serde(default)]
+    pub gguf_file: Option<String>,
+
+    /// Internal model name used for lookups (e.g. "nomic-embed-text-v1.5").
+    /// Defaults to the filename stem when not specified.
+    #[serde(default)]
+    pub model_name: Option<String>,
+
+    /// Local directory to cache downloaded models.
+    /// Defaults to ~/.cache/morpheus/models
+    #[serde(default)]
+    pub model_dir: Option<String>,
+}
+
+/// Embedding provider configuration.
+///
+/// ```yaml
+/// # Ollama (default, no extra compilation needed)
+/// embedding:
+///   provider: ollama
+///   url: http://localhost:11434
+///   model: nomic-embed-text
+///   dimensions: 768
+///
+/// # Local GGUF via llama.cpp (requires `gguf` feature)
+/// embedding:
+///   provider: gguf
+///   model_dir: /opt/models   # optional
+/// ```
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(tag = "provider", rename_all = "lowercase")]
+pub enum EmbeddingConfig {
+    Ollama(#[serde(default)] OllamaConfig),
+    #[cfg(feature = "gguf")]
+    Gguf(#[serde(default)] GgufConfig),
+}
+
+impl Default for EmbeddingConfig {
+    fn default() -> Self {
+        EmbeddingConfig::Ollama(OllamaConfig::default())
     }
 }
@@ -57,6 +57,7 @@ use schema::*;
 // Re-exports
 pub use config::EmbeddingConfig;
 pub use provider::batch_coordinator::EmbeddingBatchCoordinator;
+#[cfg(feature = "gguf")]
 pub use provider::gguf::{GgufEmbedding, GgufModelConfig};
 pub use provider::ollama::OllamaEmbedding;
 pub use provider::{EmbeddingProvider, EmbeddingResult};
@@ -115,9 +116,14 @@ impl EmbeddingIndexer {
         let indexer = Self {
             providers: Arc::new(PtrHashMap::with_capacity(16)),
             provider_names: Arc::new(RwLock::new(Vec::new())),
-            // Default to GGUF provider with all-MiniLM-L6-v2 model
+            #[cfg(feature = "gguf")]
             default_provider: "gguf".to_string(),
+            #[cfg(feature = "gguf")]
             default_model: provider::gguf::DEFAULT_MODEL_NAME.to_string(),
+            #[cfg(not(feature = "gguf"))]
+            default_provider: "ollama".to_string(),
+            #[cfg(not(feature = "gguf"))]
+            default_model: "nomic-embed-text".to_string(),
             index_cache: Arc::new(PtrHashMap::with_capacity(64)),
             index_models: Arc::new(PtrHashMap::with_capacity(64)),
             hnsw_coordinator,
@@ -757,22 +763,23 @@ pub async fn initialize_embedding_service_for_runtime(
 ) -> Result<(), String> {
     let mut indexer = EmbeddingIndexer::new(runtime).await?;
 
-    // Use GgufEmbedding (auto-downloads from HuggingFace)
-    // - GGUF format via embellama
-    // - Auto-detects embedding dimensions
-    // - Configured for high-end GPU (larger batches)
-    let model_dir = GgufEmbedding::default_model_dir();
-    let config = provider::gguf::GgufModelConfig::default().high_end_gpu();
-    let gguf = GgufEmbedding::with_config(&model_dir, config)?;
-    info!(
-        "GGUF embedding provider initialized (model: {}, dims: {})",
-        gguf.model_name(),
-        gguf.embedding_dim()
-    );
-    indexer.register_provider(Arc::new(gguf));
-
-    // Set default provider and model
-    indexer.set_default("gguf", provider::gguf::DEFAULT_MODEL_NAME);
+    #[cfg(feature = "gguf")]
+    {
+        // Use GgufEmbedding (auto-downloads from HuggingFace)
+        // - GGUF format via embellama
+        // - Auto-detects embedding dimensions
+        // - Configured for high-end GPU (larger batches)
+        let model_dir = GgufEmbedding::default_model_dir();
+        let config = provider::gguf::GgufModelConfig::default().high_end_gpu();
+        let gguf = GgufEmbedding::with_config(&model_dir, config)?;
+        info!(
+            "GGUF embedding provider initialized (model: {}, dims: {})",
+            gguf.model_name(),
+            gguf.embedding_dim()
+        );
+        indexer.register_provider(Arc::new(gguf));
+        indexer.set_default("gguf", provider::gguf::DEFAULT_MODEL_NAME);
+    }
 
     // Set on Neb server's embedding client
     if let Some(index_builder) = runtime.database_runtime().indexer() {
@@ -802,17 +809,69 @@ pub async fn initialize_embedding_service_for_runtime_with_config(
 ) -> Result<(), String> {
     let mut indexer = EmbeddingIndexer::new(runtime).await?;
 
-    // Register Ollama provider with configured URL and dimensions
-    let ollama =
-        OllamaEmbedding::new_with_dimensions(&config.url, &config.model, config.dimensions);
-    info!(
-        "Ollama embedding provider initialized (URL: {}, model: {}, dims: {})",
-        config.url, config.model, config.dimensions
-    );
-    indexer.register_provider(Arc::new(ollama));
-
-    // Set default provider and model
-    indexer.set_default("ollama", &config.model);
+    match config {
+        EmbeddingConfig::Ollama(ollama_cfg) => {
+            let ollama = OllamaEmbedding::new(&ollama_cfg.url);
+            let dimensions = match ollama_cfg.dimensions {
+                Some(d) => d,
+                None => {
+                    // Auto-detect by probing the model with a single embedding
+                    let probe = ollama
+                        .embed_document(&ollama_cfg.model, "probe")
+                        .await
+                        .map_err(|e| {
+                            format!(
+                                "Failed to auto-detect dimensions for Ollama model '{}': {}",
+                                ollama_cfg.model, e
+                            )
+                        })?;
+                    probe.dimensions
+                }
+            };
+            let ollama = OllamaEmbedding::new_with_dimensions(
+                &ollama_cfg.url,
+                &ollama_cfg.model,
+                dimensions,
+            );
+            info!(
+                "Ollama embedding provider initialized (URL: {}, model: {}, dims: {})",
+                ollama_cfg.url, ollama_cfg.model, dimensions
+            );
+            indexer.register_provider(Arc::new(ollama));
+            indexer.set_default("ollama", &ollama_cfg.model);
+        }
+        #[cfg(feature = "gguf")]
+        EmbeddingConfig::Gguf(gguf_cfg) => {
+            let model_dir = gguf_cfg
+                .model_dir
+                .clone()
+                .unwrap_or_else(GgufEmbedding::default_model_dir);
+            let mut gguf_model_config = provider::gguf::GgufModelConfig::default();
+            if let Some(hf_model_id) = &gguf_cfg.hf_model_id {
+                gguf_model_config.hf_model_id = hf_model_id.clone();
+            }
+            if let Some(gguf_file) = &gguf_cfg.gguf_file {
+                gguf_model_config.gguf_file = gguf_file.clone();
+            }
+            if let Some(model_name) = &gguf_cfg.model_name {
+                gguf_model_config.model_name = model_name.clone();
+            } else if gguf_cfg.gguf_file.is_some() {
+                // Derive model_name from the gguf filename stem when not explicitly set
+                gguf_model_config.model_name = gguf_model_config
+                    .gguf_file
+                    .trim_end_matches(".gguf")
+                    .to_string();
+            }
+            let gguf = GgufEmbedding::with_config(&model_dir, gguf_model_config)?;
+            info!(
+                "GGUF embedding provider initialized (model: {}, dims: {})",
+                gguf.model_name(),
+                gguf.embedding_dim()
+            );
+            indexer.register_provider(Arc::new(gguf));
+            indexer.set_default("gguf", provider::gguf::DEFAULT_MODEL_NAME);
+        }
+    }
 
     // Set on Neb server's embedding client
     if let Some(index_builder) = runtime.database_runtime().indexer() {
 
@@ -9,6 +9,7 @@
 //! which is significantly more efficient than processing texts one at a time.
 
 pub mod batch_coordinator;
+#[cfg(feature = "gguf")]
 pub mod gguf;
 pub mod ollama;
 
 
@@ -114,6 +114,7 @@ mod provider_tests {
     }
 
     /// Compare latency between GGUF (local embellama) and Ollama (remote GPU)
+    #[cfg(feature = "gguf")]
     #[tokio::test]
     #[ignore] // Run with: cargo test test_latency_comparison -- --ignored --nocapture
     async fn test_latency_comparison() {
@@ -240,6 +241,7 @@ mod provider_tests {
 
     /// Test GGUF embedding provider
     /// The model will be auto-downloaded from HuggingFace Hub if not present
+    #[cfg(feature = "gguf")]
     #[tokio::test]
     #[ignore] // Run with: cargo test -- --ignored
     async fn test_gguf_provider() {