Skip to content

Commit 9184188

Browse files
committed
refactor: use Arc<String> for diffs, rayon parsing, concurrent git fetching
- FileChange.diff uses Arc<String> to avoid expensive cloning in split flow - AnalyzerService.extract_symbols uses rayon par_iter for parallel tree-sitter parsing (Parser created per-file, not Send/Sync) - GitService.fetch_file_contents fetches staged+HEAD content concurrently via tokio JoinSet instead of sequential per-file calls - Apply let-chain idioms in config.rs, anthropic.rs, ollama.rs, git.rs
1 parent 0f6af25 commit 9184188

6 files changed

Lines changed: 176 additions & 90 deletions

File tree

src/config.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,10 +171,10 @@ impl Config {
171171
}
172172

173173
// User-level config
174-
if let Some(path) = Self::config_path() {
175-
if path.exists() {
176-
figment = figment.merge(Toml::file(&path));
177-
}
174+
if let Some(path) = Self::config_path()
175+
&& path.exists()
176+
{
177+
figment = figment.merge(Toml::file(&path));
178178
}
179179

180180
// Environment variables (COMMITBEE_MODEL, COMMITBEE_PROVIDER, etc.)

src/domain/change.rs

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
// SPDX-License-Identifier: PolyForm-Noncommercial-1.0.0
44

55
use std::path::PathBuf;
6+
use std::sync::Arc;
67

78
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89
pub enum ChangeStatus {
@@ -49,11 +50,43 @@ impl FileCategory {
4950
// Build/CI detection
5051
if path.starts_with(".github/")
5152
|| path_str.contains("/.github/")
53+
|| path.starts_with(".gitlab-ci")
54+
|| path.starts_with(".circleci/")
55+
|| path_str.contains("/.circleci/")
5256
|| matches!(
5357
name,
54-
"Dockerfile" | "docker-compose.yml" | "Makefile" | "justfile" | ".dockerignore"
58+
"Dockerfile"
59+
| "Containerfile"
60+
| "docker-compose.yml"
61+
| "docker-compose.yaml"
62+
| "podman-compose.yml"
63+
| "podman-compose.yaml"
64+
| "compose.yml"
65+
| "compose.yaml"
66+
| "Makefile"
67+
| "justfile"
68+
| ".dockerignore"
69+
| ".containerignore"
70+
| "Jenkinsfile"
71+
| "Procfile"
72+
| "CMakeLists.txt"
73+
| "Makefile.am"
74+
| "configure.ac"
75+
| ".travis.yml"
76+
| "azure-pipelines.yml"
77+
| "netlify.toml"
78+
| "vercel.json"
79+
| "fly.toml"
80+
| "render.yaml"
81+
| "railway.toml"
82+
| "Earthfile"
83+
| "Tiltfile"
84+
| "skaffold.yaml"
85+
| "helmfile.yaml"
86+
| "Vagrantfile"
5587
)
5688
|| ext == "dockerfile"
89+
|| ext == "containerfile"
5790
{
5891
return Self::Build;
5992
}
@@ -72,14 +105,56 @@ impl FileCategory {
72105
| "go.mod"
73106
| "go.sum"
74107
| "bun.lockb"
108+
| "biome.json"
109+
| "biome.jsonc"
110+
| "deno.json"
111+
| "deno.jsonc"
112+
| ".eslintrc"
113+
| ".eslintrc.json"
114+
| ".eslintrc.js"
115+
| ".prettierrc"
116+
| ".prettierrc.json"
117+
| "ruff.toml"
118+
| ".ruff.toml"
119+
| "setup.py"
120+
| "setup.cfg"
121+
| "tox.ini"
122+
| "Pipfile"
123+
| "Pipfile.lock"
124+
| "uv.lock"
125+
| "Gemfile"
126+
| "Gemfile.lock"
127+
| "Rakefile"
128+
| "pom.xml"
129+
| "build.gradle"
130+
| "build.gradle.kts"
131+
| "settings.gradle"
132+
| "settings.gradle.kts"
133+
| "mix.exs"
134+
| "pubspec.yaml"
135+
| "pubspec.lock"
136+
| "REUSE.toml"
137+
| ".editorconfig"
138+
| "flake.nix"
139+
| "flake.lock"
140+
| "renovate.json"
141+
| "dependabot.yml"
75142
) {
76143
return Self::Config;
77144
}
78145

146+
// Dotfiles with config extensions
147+
if name.starts_with('.') && matches!(ext, "json" | "yaml" | "yml" | "toml" | "ini" | "cfg")
148+
{
149+
return Self::Config;
150+
}
151+
79152
// By extension - source code
80153
match ext {
81154
"rs" | "ts" | "js" | "py" | "go" | "tsx" | "jsx" | "java" | "kt" | "c" | "cpp"
82-
| "h" | "hpp" => Self::Source,
155+
| "h" | "hpp" | "cs" | "rb" | "swift" | "scala" | "ex" | "exs" | "php" | "r"
156+
| "lua" | "zig" | "nim" | "dart" | "vue" | "svelte" | "ml" | "mli" | "hs" | "clj"
157+
| "cljs" | "erl" | "hrl" | "pl" | "pm" | "sh" | "bash" | "zsh" => Self::Source,
83158
_ => Self::Other,
84159
}
85160
}
@@ -100,7 +175,7 @@ impl FileCategory {
100175
pub struct FileChange {
101176
pub path: PathBuf,
102177
pub status: ChangeStatus,
103-
pub diff: String,
178+
pub diff: Arc<String>,
104179
pub additions: usize,
105180
pub deletions: usize,
106181
pub category: FileCategory,

src/services/analyzer.rs

Lines changed: 42 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
//
33
// SPDX-License-Identifier: PolyForm-Noncommercial-1.0.0
44

5+
use std::collections::HashMap;
6+
use std::path::{Path, PathBuf};
57
use std::sync::LazyLock;
68

9+
use rayon::prelude::*;
710
use regex::Regex;
8-
use std::path::Path;
911
use tree_sitter::{Language, Parser};
1012

1113
use crate::domain::{CodeSymbol, FileChange, SymbolKind};
@@ -81,59 +83,49 @@ impl AnalyzerService {
8183
Ok(Self)
8284
}
8385

84-
/// Extract symbols from file changes using full file content + hunk mapping
86+
/// Extract symbols from file changes using full file content + hunk mapping.
87+
/// Uses rayon to parse files in parallel across CPU cores.
8588
pub fn extract_symbols(
86-
&mut self,
89+
&self,
8790
changes: &[FileChange],
88-
staged_content: &dyn Fn(&Path) -> Option<String>,
89-
head_content: &dyn Fn(&Path) -> Option<String>,
91+
staged_content: &HashMap<PathBuf, String>,
92+
head_content: &HashMap<PathBuf, String>,
9093
) -> Vec<CodeSymbol> {
91-
let mut symbols = Vec::new();
92-
93-
for change in changes {
94-
if change.is_binary {
95-
continue;
96-
}
97-
98-
let ext = change
99-
.path
100-
.extension()
101-
.and_then(|e| e.to_str())
102-
.unwrap_or("");
103-
104-
let hunks = DiffHunk::parse_from_diff(&change.diff);
105-
106-
// Get the appropriate language for parsing
107-
let language: Option<Language> = match ext {
108-
"rs" => Some(tree_sitter_rust::LANGUAGE.into()),
109-
"ts" | "tsx" => Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
110-
"py" => Some(tree_sitter_python::LANGUAGE.into()),
111-
"go" => Some(tree_sitter_go::LANGUAGE.into()),
112-
"js" | "jsx" => Some(tree_sitter_javascript::LANGUAGE.into()),
113-
_ => None,
114-
};
115-
116-
if let Some(lang) = language {
117-
let file_symbols = Self::extract_for_file_static(
118-
lang,
119-
change,
120-
&hunks,
121-
staged_content,
122-
head_content,
123-
);
124-
symbols.extend(file_symbols);
125-
}
126-
}
127-
128-
symbols
94+
changes
95+
.par_iter()
96+
.filter(|change| !change.is_binary)
97+
.flat_map(|change| {
98+
let ext = change
99+
.path
100+
.extension()
101+
.and_then(|e| e.to_str())
102+
.unwrap_or("");
103+
104+
let language: Option<Language> = match ext {
105+
"rs" => Some(tree_sitter_rust::LANGUAGE.into()),
106+
"ts" | "tsx" => Some(tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into()),
107+
"py" => Some(tree_sitter_python::LANGUAGE.into()),
108+
"go" => Some(tree_sitter_go::LANGUAGE.into()),
109+
"js" | "jsx" => Some(tree_sitter_javascript::LANGUAGE.into()),
110+
_ => None,
111+
};
112+
113+
language
114+
.map(|lang| {
115+
let hunks = DiffHunk::parse_from_diff(&change.diff);
116+
Self::extract_for_file(lang, change, &hunks, staged_content, head_content)
117+
})
118+
.unwrap_or_default()
119+
})
120+
.collect()
129121
}
130122

131-
fn extract_for_file_static(
123+
fn extract_for_file(
132124
language: Language,
133125
change: &FileChange,
134126
hunks: &[DiffHunk],
135-
staged_content: &dyn Fn(&Path) -> Option<String>,
136-
head_content: &dyn Fn(&Path) -> Option<String>,
127+
staged_content: &HashMap<PathBuf, String>,
128+
head_content: &HashMap<PathBuf, String>,
137129
) -> Vec<CodeSymbol> {
138130
let mut parser = Parser::new();
139131
if parser.set_language(&language).is_err() {
@@ -143,23 +135,23 @@ impl AnalyzerService {
143135
let mut symbols = Vec::new();
144136

145137
// Parse staged (new) file content
146-
if let Some(content) = staged_content(&change.path) {
138+
if let Some(content) = staged_content.get(&change.path) {
147139
let changed = Self::extract_changed_symbols_static(
148140
&mut parser,
149141
&change.path,
150-
&content,
142+
content,
151143
hunks,
152144
true,
153145
);
154146
symbols.extend(changed);
155147
}
156148

157149
// Parse HEAD (old) file content
158-
if let Some(content) = head_content(&change.path) {
150+
if let Some(content) = head_content.get(&change.path) {
159151
let changed = Self::extract_changed_symbols_static(
160152
&mut parser,
161153
&change.path,
162-
&content,
154+
content,
163155
hunks,
164156
false,
165157
);

src/services/git.rs

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use std::collections::HashMap;
66
use std::path::{Path, PathBuf};
7+
use std::sync::Arc;
78

89
use tokio::process::Command;
910

@@ -109,7 +110,7 @@ impl GitService {
109110
files.push(FileChange {
110111
path: file_path,
111112
status,
112-
diff,
113+
diff: Arc::new(diff),
113114
additions,
114115
deletions,
115116
category,
@@ -148,14 +149,12 @@ impl GitService {
148149
current_path = Some(path.to_string());
149150
}
150151
// For deleted files, +++ is /dev/null — use --- header instead
151-
if line == "+++ /dev/null" {
152-
if let Some(last_minus) =
152+
if line == "+++ /dev/null"
153+
&& let Some(last_minus) =
153154
current_lines.iter().rev().find(|l| l.starts_with("--- a/"))
154-
{
155-
if let Some(path) = last_minus.strip_prefix("--- a/") {
156-
current_path = Some(path.to_string());
157-
}
158-
}
155+
&& let Some(path) = last_minus.strip_prefix("--- a/")
156+
{
157+
current_path = Some(path.to_string());
159158
}
160159

161160
current_lines.push(line);
@@ -176,27 +175,47 @@ impl GitService {
176175

177176
// ─── File Content ───
178177

179-
/// Get staged file content (from index)
180-
pub async fn get_staged_content(&self, path: &Path) -> Option<String> {
181-
let output: std::process::Output = Command::new("git")
182-
.args(["show", &format!(":0:{}", path.display())])
183-
.current_dir(&self.work_dir)
184-
.output()
185-
.await
186-
.ok()?;
178+
/// Fetch staged and HEAD content for multiple files concurrently.
179+
/// Spawns all git-show processes in parallel instead of sequentially.
180+
pub async fn fetch_file_contents(
181+
&self,
182+
paths: &[PathBuf],
183+
) -> (HashMap<PathBuf, String>, HashMap<PathBuf, String>) {
184+
let mut set = tokio::task::JoinSet::new();
185+
186+
for path in paths {
187+
let work_dir = self.work_dir.clone();
188+
let path = path.clone();
189+
set.spawn(async move {
190+
let staged =
191+
Self::fetch_git_show(&work_dir, &format!(":0:{}", path.display())).await;
192+
let head =
193+
Self::fetch_git_show(&work_dir, &format!("HEAD:{}", path.display())).await;
194+
(path, staged, head)
195+
});
196+
}
187197

188-
if output.status.success() {
189-
String::from_utf8(output.stdout).ok()
190-
} else {
191-
None
198+
let mut staged_map = HashMap::new();
199+
let mut head_map = HashMap::new();
200+
201+
while let Some(result) = set.join_next().await {
202+
if let Ok((path, staged, head)) = result {
203+
if let Some(content) = staged {
204+
staged_map.insert(path.clone(), content);
205+
}
206+
if let Some(content) = head {
207+
head_map.insert(path, content);
208+
}
209+
}
192210
}
211+
212+
(staged_map, head_map)
193213
}
194214

195-
/// Get HEAD file content
196-
pub async fn get_head_content(&self, path: &Path) -> Option<String> {
215+
async fn fetch_git_show(work_dir: &Path, ref_path: &str) -> Option<String> {
197216
let output: std::process::Output = Command::new("git")
198-
.args(["show", &format!("HEAD:{}", path.display())])
199-
.current_dir(&self.work_dir)
217+
.args(["show", ref_path])
218+
.current_dir(work_dir)
200219
.output()
201220
.await
202221
.ok()?;

src/services/llm/anthropic.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,11 @@ impl AnthropicProvider {
179179
if let Ok(event) = serde_json::from_str::<StreamEvent>(data) {
180180
match event.event_type.as_str() {
181181
"content_block_delta" => {
182-
if let Some(delta) = &event.delta {
183-
if let Some(text) = &delta.text {
184-
let _ = token_tx.send(text.clone()).await;
185-
full_response.push_str(text);
186-
}
182+
if let Some(delta) = &event.delta
183+
&& let Some(text) = &delta.text
184+
{
185+
let _ = token_tx.send(text.clone()).await;
186+
full_response.push_str(text);
187187
}
188188
}
189189
"message_stop" => {

0 commit comments

Comments
 (0)