feat(ollama): add health check, timeout, and model verification

Sephyi · Sephyi · commit 7d7d89dd26f1 · 2026-02-18T01:20:54.000+01:00
Harden Ollama provider with configurable timeout, temperature, and
num_predict. Add health_check() via /api/tags and verify_model() to
differentiate connection failures from missing models. Detect
connection and timeout errors with dedicated error variants.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,6 +13,7 @@ license = "GPL-3.0-only"
 [dependencies]
 # CLI
 clap = { version = "4.5", features = ["derive", "env"] }
+clap_complete = "4.5"
 
 # Async runtime
 tokio = { version = "1.43", features = ["rt-multi-thread", "macros", "signal", "sync"] }
@@ -43,12 +44,17 @@ tree-sitter-javascript = "0.25"
 
 # Error handling
 thiserror = "2.0"
+miette = { version = "7.6", features = ["fancy"] }
 
 # Terminal UI
 dialoguer = "0.12"
 console = "0.16"
 indicatif = "0.18"
 
+# Logging
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+
 # Utilities
 regex = "1.12"
 
diff --git a/src/app.rs b/src/app.rs
@@ -135,6 +135,7 @@ impl App {
         ));
 
         let provider = llm::create_provider(&self.config)?;
+        provider.verify().await?;
 
         // Setup streaming output
         let (tx, mut rx) = mpsc::channel::<String>(64);
diff --git a/src/config.rs b/src/config.rs
@@ -84,6 +84,18 @@ pub struct Config {
     #[serde(default = "default_max_context_chars")]
     pub max_context_chars: usize,
 
+    /// Request timeout in seconds (default 300)
+    #[serde(default = "default_timeout_secs")]
+    pub timeout_secs: u64,
+
+    /// LLM temperature (0.0-2.0, default 0.3)
+    #[serde(default = "default_temperature")]
+    pub temperature: f32,
+
+    /// Maximum tokens to generate (default 256)
+    #[serde(default = "default_num_predict")]
+    pub num_predict: u32,
+
     /// Commit message format options
     #[serde(default)]
     pub format: CommitFormat,
@@ -105,6 +117,15 @@ fn default_max_diff_lines() -> usize {
 fn default_max_file_lines() -> usize {
     100
 }
+fn default_timeout_secs() -> u64 {
+    300
+}
+fn default_temperature() -> f32 {
+    0.3
+}
+fn default_num_predict() -> u32 {
+    256
+}
 
 impl Default for Config {
     fn default() -> Self {
@@ -116,6 +137,9 @@ impl Default for Config {
             max_diff_lines: default_max_diff_lines(),
             max_file_lines: default_max_file_lines(),
             max_context_chars: default_max_context_chars(),
+            timeout_secs: default_timeout_secs(),
+            temperature: default_temperature(),
+            num_predict: default_num_predict(),
             format: CommitFormat::default(),
         }
     }
diff --git a/src/error.rs b/src/error.rs
@@ -24,6 +24,15 @@ pub enum Error {
     #[error("Potential secrets detected: {patterns:?}. Use --allow-secrets to proceed.")]
     SecretsDetected { patterns: Vec<String> },
 
+    #[error("Cannot connect to Ollama at {host}. Is it running?")]
+    OllamaNotRunning { host: String },
+
+    #[error("Model '{model}' not found. Available: {}", available.join(", "))]
+    ModelNotFound {
+        model: String,
+        available: Vec<String>,
+    },
+
     #[error("Provider '{provider}' error: {message}")]
     Provider { provider: String, message: String },
 
diff --git a/src/services/llm/mod.rs b/src/services/llm/mod.rs
@@ -33,6 +33,13 @@ impl LlmBackend {
             Self::Ollama(p) => p.name(),
         }
     }
+
+    /// Verify provider connectivity and model availability
+    pub async fn verify(&self) -> Result<()> {
+        match self {
+            Self::Ollama(p) => p.verify_model().await,
+        }
+    }
 }
 
 pub fn create_provider(config: &Config) -> Result<LlmBackend> {
diff --git a/src/services/llm/ollama.rs b/src/services/llm/ollama.rs
@@ -2,6 +2,8 @@
 //
 // SPDX-License-Identifier: GPL-3.0-only
 
+use std::time::Duration;
+
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use tokio::sync::mpsc;
@@ -15,6 +17,8 @@ pub struct OllamaProvider {
     client: Client,
     host: String,
     model: String,
+    temperature: f32,
+    num_predict: u32,
 }
 
 #[derive(Serialize)]
@@ -23,6 +27,13 @@ struct GenerateRequest {
     prompt: String,
     system: String,
     stream: bool,
+    options: OllamaOptions,
+}
+
+#[derive(Serialize)]
+struct OllamaOptions {
+    temperature: f32,
+    num_predict: u32,
 }
 
 const SYSTEM_PROMPT: &str = r#"You are a commit message generator. Analyze git diffs and output JSON commit messages.
@@ -42,16 +53,79 @@ struct GenerateResponse {
     done: bool,
 }
 
+#[derive(Deserialize)]
+struct TagsResponse {
+    models: Vec<ModelInfo>,
+}
+
+#[derive(Deserialize)]
+struct ModelInfo {
+    name: String,
+}
+
 impl OllamaProvider {
     pub fn new(config: &Config) -> Self {
+        let client = Client::builder()
+            .timeout(Duration::from_secs(config.timeout_secs))
+            .build()
+            .unwrap_or_default();
+
         Self {
-            client: Client::new(),
+            client,
             // Sanitize: remove trailing slashes to avoid //api/generate
             host: config.ollama_host.trim_end_matches('/').to_string(),
             model: config.model.clone(),
+            temperature: config.temperature,
+            num_predict: config.num_predict,
         }
     }
 
+    /// Check Ollama connectivity and return available model names
+    pub async fn health_check(&self) -> Result<Vec<String>> {
+        let url = format!("{}/api/tags", self.host);
+
+        let response = self.client.get(&url).send().await.map_err(|e| {
+            if e.is_connect() {
+                Error::OllamaNotRunning {
+                    host: self.host.clone(),
+                }
+            } else {
+                Error::Provider {
+                    provider: "ollama".into(),
+                    message: e.to_string(),
+                }
+            }
+        })?;
+
+        let tags: TagsResponse = response.json().await.map_err(|e| Error::Provider {
+            provider: "ollama".into(),
+            message: format!("failed to parse /api/tags response: {e}"),
+        })?;
+
+        Ok(tags.models.into_iter().map(|m| m.name).collect())
+    }
+
+    /// Verify that the configured model is available
+    pub async fn verify_model(&self) -> Result<()> {
+        let available = self.health_check().await?;
+
+        // Ollama model names may include `:latest` tag
+        let model_matches = available.iter().any(|name| {
+            name == &self.model
+                || name == &format!("{}:latest", self.model)
+                || name.strip_suffix(":latest") == Some(&self.model)
+        });
+
+        if !model_matches {
+            return Err(Error::ModelNotFound {
+                model: self.model.clone(),
+                available,
+            });
+        }
+
+        Ok(())
+    }
+
     pub async fn generate(
         &self,
         prompt: &str,
@@ -68,12 +142,29 @@ impl OllamaProvider {
                 prompt: prompt.to_string(),
                 system: SYSTEM_PROMPT.to_string(),
                 stream: true,
+                options: OllamaOptions {
+                    temperature: self.temperature,
+                    num_predict: self.num_predict,
+                },
             })
             .send()
             .await
-            .map_err(|e| Error::Provider {
-                provider: "ollama".into(),
-                message: e.to_string(),
+            .map_err(|e| {
+                if e.is_connect() {
+                    Error::OllamaNotRunning {
+                        host: self.host.clone(),
+                    }
+                } else if e.is_timeout() {
+                    Error::Provider {
+                        provider: "ollama".into(),
+                        message: "request timed out".into(),
+                    }
+                } else {
+                    Error::Provider {
+                        provider: "ollama".into(),
+                        message: e.to_string(),
+                    }
+                }
             })?;
 
         if !response.status().is_success() {
diff --git a/tests/config.rs b/tests/config.rs
@@ -16,6 +16,9 @@ fn default_config_values() {
     assert_eq!(config.max_diff_lines, 500);
     assert_eq!(config.max_file_lines, 100);
     assert_eq!(config.max_context_chars, 24000);
+    assert_eq!(config.timeout_secs, 300);
+    assert!((config.temperature - 0.3).abs() < f32::EPSILON);
+    assert_eq!(config.num_predict, 256);
     assert!(config.format.include_body);
     assert!(config.format.include_scope);
     assert!(config.format.lowercase_subject);

Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,13 @@ impl LlmBackend {`
`33`	`33`	`Self::Ollama(p) => p.name(),`
`34`	`34`	`}`
`35`	`35`	`}`
	`36`	`+`
	`37`	`+ /// Verify provider connectivity and model availability`
	`38`	`+ pub async fn verify(&self) -> Result<()> {`
	`39`	`+ match self {`
	`40`	`+ Self::Ollama(p) => p.verify_model().await,`
	`41`	`+ }`
	`42`	`+ }`
`36`	`43`	`}`
`37`	`44`
`38`	`45`	`pub fn create_provider(config: &Config) -> Result<LlmBackend> {`