Skip to content

Commit f157b21

Browse files
committed
Checkpoint current local changes
1 parent 3ef1f15 commit f157b21

13 files changed

Lines changed: 281 additions & 147 deletions

File tree

Cargo.toml

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ async-std = "1"
3939
ahash = "0.8.11"
4040
once_cell = "1.21.3"
4141
itertools = "0.14.0"
42-
moka = "0.12"
42+
moka = { version = "0.12", features = ["future", "sync"] }
4343
dashmap = "6.1"
4444
ndarray = "0.16.1"
4545
serde_json = "1"
@@ -57,21 +57,23 @@ tower-http = { version = "0.5", features = ["cors"] }
5757
bs58 = "0.5"
5858

5959
# Embedding dependencies
60-
embellama = "0.8.0"
60+
embellama = { version = "0.8.0", optional = true }
6161
async-trait = "0.1"
6262
reqwest = { version = "0.12", features = ["json"] }
63-
hf-hub = "0.4"
63+
hf-hub = { version = "0.4", optional = true }
6464

6565
[features]
66-
default = ["cpu_optimized"]
66+
default = []
67+
# Local GGUF embedding via llama.cpp (compiles llama.cpp C++ — slow first build)
68+
gguf = ["embellama", "hf-hub"]
69+
# GGUF + CPU-optimized llama.cpp (recommended when using GGUF without a GPU)
70+
cpu_optimized = ["gguf", "embellama/cpu-optimized"]
6771
# Enable CUDA GPU acceleration for embedding (requires NVIDIA GPU and CUDA toolkit)
68-
cuda = ["embellama/cuda"]
72+
cuda = ["gguf", "embellama/cuda"]
6973
# Enable Metal GPU acceleration for embedding (macOS only)
70-
metal = ["embellama/metal"]
74+
metal = ["gguf", "embellama/metal"]
7175
# Enable Vulkan GPU acceleration for embedding
72-
vulkan = ["embellama/vulkan"]
73-
74-
cpu_optimized = ["embellama/cpu-optimized"]
76+
vulkan = ["gguf", "embellama/vulkan"]
7577

7678
# Enable detailed logging in hot paths (Trace + Debug + verbose Info)
7779
verbose-logging = []

config/log4rs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ appenders:
88
encoder:
99
pattern: "{d} - {m}{n}"
1010
root:
11-
level: Debug
11+
level: info
1212
appenders:
1313
- stdout
1414
loggers:

config/wikidata_server.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,23 @@
11
server_addr: 127.0.0.1:5401
2+
http_addr: 0.0.0.0:8080 # HTTP gateway bind address (overrides MORPHEUS_HTTP_ADDR env var)
3+
4+
# Embedding provider (optional — omit to disable embedding service)
5+
# Ollama (no extra compilation needed):
6+
# `dimensions` is optional — auto-detected by probing the model at startup if omitted.
7+
embedding:
8+
provider: ollama
9+
url: http://192.168.10.238:11434/
10+
model: nomic-embed-text-v2-moe
11+
# dimensions: 512 # optional override
12+
# Local GGUF via llama.cpp (build with --features gguf or cpu_optimized):
13+
# All fields except `provider` are optional; omitting hf_model_id uses the
14+
# default nomic-embed-text-v1.5 model (auto-downloaded from HuggingFace Hub).
15+
# embedding:
16+
# provider: gguf
17+
# hf_model_id: CompendiumLabs/bge-base-en-v1.5-gguf
18+
# gguf_file: bge-base-en-v1.5-q8_0.gguf
19+
# model_name: bge-base-en-v1.5 # optional, derived from gguf_file if omitted
20+
# model_dir: /opt/models # optional, defaults to ~/.cache/morpheus/models
221
group_name: WikidataMorpheus
322
meta_members:
423
- 127.0.0.1:5401

src/apps/embedding/config.rs

Lines changed: 75 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,103 @@
11
//! Embedding Service Configuration
2-
//!
3-
//! Simple configuration for Ollama embedding provider.
42
53
use serde::{Deserialize, Serialize};
64

7-
/// Embedding service configuration
5+
/// Ollama embedding provider configuration
86
#[derive(Debug, Clone, Deserialize, Serialize)]
9-
pub struct EmbeddingConfig {
7+
pub struct OllamaConfig {
108
/// Ollama server URL (default: http://localhost:11434)
11-
#[serde(default = "default_url")]
9+
#[serde(default = "default_ollama_url")]
1210
pub url: String,
1311

1412
/// Model name (default: nomic-embed-text)
15-
#[serde(default = "default_model")]
13+
#[serde(default = "default_ollama_model")]
1614
pub model: String,
1715

18-
/// Vector dimensions (default: 768 for nomic-embed-text)
19-
#[serde(default = "default_dimensions")]
20-
pub dimensions: usize,
16+
/// Vector dimensions. If omitted, auto-detected by probing the model at startup.
17+
#[serde(default)]
18+
pub dimensions: Option<usize>,
2119
}
2220

23-
fn default_url() -> String {
21+
fn default_ollama_url() -> String {
2422
"http://localhost:11434".to_string()
2523
}
2624

27-
fn default_model() -> String {
25+
fn default_ollama_model() -> String {
2826
"nomic-embed-text".to_string()
2927
}
3028

31-
fn default_dimensions() -> usize {
32-
768
33-
}
34-
35-
impl Default for EmbeddingConfig {
29+
impl Default for OllamaConfig {
3630
fn default() -> Self {
3731
Self {
38-
url: default_url(),
39-
model: default_model(),
40-
dimensions: default_dimensions(),
32+
url: default_ollama_url(),
33+
model: default_ollama_model(),
34+
dimensions: None,
4135
}
4236
}
4337
}
4438

45-
impl EmbeddingConfig {
46-
/// Load from YAML string
47-
pub fn from_yaml(yaml: &str) -> Result<Self, String> {
48-
serde_yaml::from_str(yaml).map_err(|e| format!("Failed to parse embedding config: {}", e))
49-
}
39+
/// GGUF (local llama.cpp) embedding provider configuration.
40+
/// Requires the `gguf` feature to be enabled.
41+
///
42+
/// The model is auto-downloaded from HuggingFace Hub if not already cached.
43+
///
44+
/// ```yaml
45+
/// embedding:
46+
/// provider: gguf
47+
/// hf_model_id: nomic-ai/nomic-embed-text-v1.5-GGUF
48+
/// gguf_file: nomic-embed-text-v1.5.Q8_0.gguf
49+
/// model_name: nomic-embed-text-v1.5
50+
/// model_dir: /opt/models # optional cache dir, defaults to ~/.cache/morpheus/models
51+
/// ```
52+
#[cfg(feature = "gguf")]
53+
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
54+
pub struct GgufConfig {
55+
/// HuggingFace repository ID (e.g. "nomic-ai/nomic-embed-text-v1.5-GGUF").
56+
/// Defaults to the built-in nomic-embed-text-v1.5 model.
57+
#[serde(default)]
58+
pub hf_model_id: Option<String>,
5059

51-
/// Load from YAML file
52-
pub fn from_file(path: &str) -> Result<Self, String> {
53-
let content = std::fs::read_to_string(path)
54-
.map_err(|e| format!("Failed to read config file: {}", e))?;
55-
Self::from_yaml(&content)
60+
/// GGUF filename within the HuggingFace repository (e.g. "nomic-embed-text-v1.5.Q8_0.gguf").
61+
/// Must be specified when `hf_model_id` is set.
62+
#[serde(default)]
63+
pub gguf_file: Option<String>,
64+
65+
/// Internal model name used for lookups (e.g. "nomic-embed-text-v1.5").
66+
/// Defaults to the filename stem when not specified.
67+
#[serde(default)]
68+
pub model_name: Option<String>,
69+
70+
/// Local directory to cache downloaded models.
71+
/// Defaults to ~/.cache/morpheus/models
72+
#[serde(default)]
73+
pub model_dir: Option<String>,
74+
}
75+
76+
/// Embedding provider configuration.
77+
///
78+
/// ```yaml
79+
/// # Ollama (default, no extra compilation needed)
80+
/// embedding:
81+
/// provider: ollama
82+
/// url: http://localhost:11434
83+
/// model: nomic-embed-text
84+
/// dimensions: 768
85+
///
86+
/// # Local GGUF via llama.cpp (requires `gguf` feature)
87+
/// embedding:
88+
/// provider: gguf
89+
/// model_dir: /opt/models # optional
90+
/// ```
91+
#[derive(Debug, Clone, Deserialize, Serialize)]
92+
#[serde(tag = "provider", rename_all = "lowercase")]
93+
pub enum EmbeddingConfig {
94+
Ollama(#[serde(default)] OllamaConfig),
95+
#[cfg(feature = "gguf")]
96+
Gguf(#[serde(default)] GgufConfig),
97+
}
98+
99+
impl Default for EmbeddingConfig {
100+
fn default() -> Self {
101+
EmbeddingConfig::Ollama(OllamaConfig::default())
56102
}
57103
}

src/apps/embedding/mod.rs

Lines changed: 87 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ use schema::*;
5757
// Re-exports
5858
pub use config::EmbeddingConfig;
5959
pub use provider::batch_coordinator::EmbeddingBatchCoordinator;
60+
#[cfg(feature = "gguf")]
6061
pub use provider::gguf::{GgufEmbedding, GgufModelConfig};
6162
pub use provider::ollama::OllamaEmbedding;
6263
pub use provider::{EmbeddingProvider, EmbeddingResult};
@@ -115,9 +116,14 @@ impl EmbeddingIndexer {
115116
let indexer = Self {
116117
providers: Arc::new(PtrHashMap::with_capacity(16)),
117118
provider_names: Arc::new(RwLock::new(Vec::new())),
118-
// Default to GGUF provider with all-MiniLM-L6-v2 model
119+
#[cfg(feature = "gguf")]
119120
default_provider: "gguf".to_string(),
121+
#[cfg(feature = "gguf")]
120122
default_model: provider::gguf::DEFAULT_MODEL_NAME.to_string(),
123+
#[cfg(not(feature = "gguf"))]
124+
default_provider: "ollama".to_string(),
125+
#[cfg(not(feature = "gguf"))]
126+
default_model: "nomic-embed-text".to_string(),
121127
index_cache: Arc::new(PtrHashMap::with_capacity(64)),
122128
index_models: Arc::new(PtrHashMap::with_capacity(64)),
123129
hnsw_coordinator,
@@ -757,22 +763,23 @@ pub async fn initialize_embedding_service_for_runtime(
757763
) -> Result<(), String> {
758764
let mut indexer = EmbeddingIndexer::new(runtime).await?;
759765

760-
// Use GgufEmbedding (auto-downloads from HuggingFace)
761-
// - GGUF format via embellama
762-
// - Auto-detects embedding dimensions
763-
// - Configured for high-end GPU (larger batches)
764-
let model_dir = GgufEmbedding::default_model_dir();
765-
let config = provider::gguf::GgufModelConfig::default().high_end_gpu();
766-
let gguf = GgufEmbedding::with_config(&model_dir, config)?;
767-
info!(
768-
"GGUF embedding provider initialized (model: {}, dims: {})",
769-
gguf.model_name(),
770-
gguf.embedding_dim()
771-
);
772-
indexer.register_provider(Arc::new(gguf));
773-
774-
// Set default provider and model
775-
indexer.set_default("gguf", provider::gguf::DEFAULT_MODEL_NAME);
766+
#[cfg(feature = "gguf")]
767+
{
768+
// Use GgufEmbedding (auto-downloads from HuggingFace)
769+
// - GGUF format via embellama
770+
// - Auto-detects embedding dimensions
771+
// - Configured for high-end GPU (larger batches)
772+
let model_dir = GgufEmbedding::default_model_dir();
773+
let config = provider::gguf::GgufModelConfig::default().high_end_gpu();
774+
let gguf = GgufEmbedding::with_config(&model_dir, config)?;
775+
info!(
776+
"GGUF embedding provider initialized (model: {}, dims: {})",
777+
gguf.model_name(),
778+
gguf.embedding_dim()
779+
);
780+
indexer.register_provider(Arc::new(gguf));
781+
indexer.set_default("gguf", provider::gguf::DEFAULT_MODEL_NAME);
782+
}
776783

777784
// Set on Neb server's embedding client
778785
if let Some(index_builder) = runtime.database_runtime().indexer() {
@@ -802,17 +809,69 @@ pub async fn initialize_embedding_service_for_runtime_with_config(
802809
) -> Result<(), String> {
803810
let mut indexer = EmbeddingIndexer::new(runtime).await?;
804811

805-
// Register Ollama provider with configured URL and dimensions
806-
let ollama =
807-
OllamaEmbedding::new_with_dimensions(&config.url, &config.model, config.dimensions);
808-
info!(
809-
"Ollama embedding provider initialized (URL: {}, model: {}, dims: {})",
810-
config.url, config.model, config.dimensions
811-
);
812-
indexer.register_provider(Arc::new(ollama));
813-
814-
// Set default provider and model
815-
indexer.set_default("ollama", &config.model);
812+
match config {
813+
EmbeddingConfig::Ollama(ollama_cfg) => {
814+
let ollama = OllamaEmbedding::new(&ollama_cfg.url);
815+
let dimensions = match ollama_cfg.dimensions {
816+
Some(d) => d,
817+
None => {
818+
// Auto-detect by probing the model with a single embedding
819+
let probe = ollama
820+
.embed_document(&ollama_cfg.model, "probe")
821+
.await
822+
.map_err(|e| {
823+
format!(
824+
"Failed to auto-detect dimensions for Ollama model '{}': {}",
825+
ollama_cfg.model, e
826+
)
827+
})?;
828+
probe.dimensions
829+
}
830+
};
831+
let ollama = OllamaEmbedding::new_with_dimensions(
832+
&ollama_cfg.url,
833+
&ollama_cfg.model,
834+
dimensions,
835+
);
836+
info!(
837+
"Ollama embedding provider initialized (URL: {}, model: {}, dims: {})",
838+
ollama_cfg.url, ollama_cfg.model, dimensions
839+
);
840+
indexer.register_provider(Arc::new(ollama));
841+
indexer.set_default("ollama", &ollama_cfg.model);
842+
}
843+
#[cfg(feature = "gguf")]
844+
EmbeddingConfig::Gguf(gguf_cfg) => {
845+
let model_dir = gguf_cfg
846+
.model_dir
847+
.clone()
848+
.unwrap_or_else(GgufEmbedding::default_model_dir);
849+
let mut gguf_model_config = provider::gguf::GgufModelConfig::default();
850+
if let Some(hf_model_id) = &gguf_cfg.hf_model_id {
851+
gguf_model_config.hf_model_id = hf_model_id.clone();
852+
}
853+
if let Some(gguf_file) = &gguf_cfg.gguf_file {
854+
gguf_model_config.gguf_file = gguf_file.clone();
855+
}
856+
if let Some(model_name) = &gguf_cfg.model_name {
857+
gguf_model_config.model_name = model_name.clone();
858+
} else if gguf_cfg.gguf_file.is_some() {
859+
// Derive model_name from the gguf filename stem when not explicitly set
860+
gguf_model_config.model_name = gguf_model_config
861+
.gguf_file
862+
.trim_end_matches(".gguf")
863+
.to_string();
864+
}
865+
let gguf = GgufEmbedding::with_config(&model_dir, gguf_model_config)?;
866+
info!(
867+
"GGUF embedding provider initialized (model: {}, dims: {})",
868+
gguf.model_name(),
869+
gguf.embedding_dim()
870+
);
871+
indexer.register_provider(Arc::new(gguf));
872+
indexer.set_default("gguf", provider::gguf::DEFAULT_MODEL_NAME);
873+
}
874+
}
816875

817876
// Set on Neb server's embedding client
818877
if let Some(index_builder) = runtime.database_runtime().indexer() {

src/apps/embedding/provider/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//! which is significantly more efficient than processing texts one at a time.
1010
1111
pub mod batch_coordinator;
12+
#[cfg(feature = "gguf")]
1213
pub mod gguf;
1314
pub mod ollama;
1415

src/apps/embedding/tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ mod provider_tests {
114114
}
115115

116116
/// Compare latency between GGUF (local embellama) and Ollama (remote GPU)
117+
#[cfg(feature = "gguf")]
117118
#[tokio::test]
118119
#[ignore] // Run with: cargo test test_latency_comparison -- --ignored --nocapture
119120
async fn test_latency_comparison() {
@@ -240,6 +241,7 @@ mod provider_tests {
240241

241242
/// Test GGUF embedding provider
242243
/// The model will be auto-downloaded from HuggingFace Hub if not present
244+
#[cfg(feature = "gguf")]
243245
#[tokio::test]
244246
#[ignore] // Run with: cargo test -- --ignored
245247
async fn test_gguf_provider() {

0 commit comments

Comments
 (0)