Skip to content

Commit 4b9f9cd

Browse files
committed
feat(context): add doc-vs-code distinction and test-to-code ratio
Add SpanChangeKind enum (WhitespaceOnly, DocsOnly, Mixed, Semantic) for richer modified-symbol classification. Doc-only changes suggest `docs` type. Modified symbols show [docs only] / [docs + code] suffix. Also add test-to-code ratio check: >80% test additions suggests `test` type. Uses cross-multiplication to avoid integer truncation.
1 parent f2535bc commit 4b9f9cd

7 files changed

Lines changed: 328 additions & 5 deletions

File tree

src/domain/symbol.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,22 @@ pub enum SymbolKind {
1919
Type,
2020
}
2121

22+
/// Richer classification of what changed within a symbol's span.
23+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24+
#[allow(dead_code)]
25+
pub enum SpanChangeKind {
26+
/// No changes within the symbol span
27+
Unchanged,
28+
/// Only whitespace/indentation changed
29+
WhitespaceOnly,
30+
/// Only doc comments changed (code unchanged)
31+
DocsOnly,
32+
/// Both doc comments and code changed
33+
Mixed,
34+
/// Code changed (no doc changes, or docs not present)
35+
Semantic,
36+
}
37+
2238
/// How a symbol was affected by the change.
2339
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
2440
#[non_exhaustive]
@@ -48,6 +64,9 @@ pub struct CodeSymbol {
4864
/// For symbols that exist in both HEAD and staged, indicates if only whitespace changed.
4965
/// None = symbol is purely added or removed, not a modification.
5066
pub is_whitespace_only: Option<bool>,
67+
/// Richer classification of what changed within this symbol's span.
68+
/// None for purely added or removed symbols.
69+
pub span_change_kind: Option<SpanChangeKind>,
5170
/// Full signature extracted from tree-sitter AST (everything before the body).
5271
/// e.g., "pub fn connect(host: &str, timeout: Duration) -> Result<Connection>"
5372
/// None for languages or constructs where signature extraction isn't supported.

src/eval.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,7 @@ impl EvalRunner {
516516
is_public: def.is_public,
517517
is_added: def.is_added,
518518
is_whitespace_only: def.is_whitespace_only,
519+
span_change_kind: None,
519520
signature: def.signature,
520521
parent_scope: def.parent_scope,
521522
})

src/services/analyzer.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ impl AnalyzerService {
346346
is_public,
347347
is_added,
348348
is_whitespace_only: None,
349+
span_change_kind: None,
349350
signature,
350351
parent_scope,
351352
});

src/services/context.rs

Lines changed: 151 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ use std::collections::HashSet;
66

77
use crate::config::Config;
88
use crate::domain::{
9-
ChangeStatus, CodeSymbol, CommitType, FileCategory, PromptContext, StagedChanges, SymbolKind,
9+
ChangeStatus, CodeSymbol, CommitType, FileCategory, PromptContext, SpanChangeKind,
10+
StagedChanges, SymbolKind,
1011
};
1112

1213
const SYSTEM_PROMPT_RESERVE: usize = 2_000;
@@ -114,6 +115,13 @@ impl ContextBuilder {
114115
old_start,
115116
old_end,
116117
);
118+
symbol.span_change_kind = Self::classify_span_change_rich(
119+
&file_change.diff,
120+
symbol.line,
121+
symbol.end_line,
122+
old_start,
123+
old_end,
124+
);
117125
}
118126
}
119127

@@ -135,7 +143,16 @@ impl ContextBuilder {
135143
&& modified_symbols
136144
.iter()
137145
.all(|s| s.is_whitespace_only == Some(true));
138-
let commit_type = Self::infer_commit_type(changes, &symbols_deduped, all_modified_ws);
146+
let all_modified_docs = !modified_symbols.is_empty()
147+
&& modified_symbols
148+
.iter()
149+
.all(|s| s.span_change_kind == Some(SpanChangeKind::DocsOnly));
150+
let commit_type = Self::infer_commit_type(
151+
changes,
152+
&symbols_deduped,
153+
all_modified_ws,
154+
all_modified_docs,
155+
);
139156
let scope = if config.format.include_scope {
140157
Self::infer_scope(changes)
141158
} else {
@@ -297,10 +314,114 @@ impl ContextBuilder {
297314
Some(old_text == new_text)
298315
}
299316

317+
/// Classify changes within a symbol span with doc-vs-code distinction.
318+
///
319+
/// Returns a richer `SpanChangeKind` that distinguishes whitespace-only,
320+
/// doc-only, mixed, and semantic changes. Returns `None` if no changes
321+
/// fall within the symbol span.
322+
pub(crate) fn classify_span_change_rich(
323+
diff: &str,
324+
new_start: usize,
325+
new_end: usize,
326+
old_start: usize,
327+
old_end: usize,
328+
) -> Option<SpanChangeKind> {
329+
use crate::services::analyzer::DiffHunk;
330+
331+
let hunks = DiffHunk::parse_from_diff(diff);
332+
let mut added_in_span: Vec<&str> = Vec::new();
333+
let mut removed_in_span: Vec<&str> = Vec::new();
334+
335+
let mut current_old_line: usize = 0;
336+
let mut current_new_line: usize = 0;
337+
let mut hunk_idx: usize = 0;
338+
let mut in_hunk = false;
339+
340+
for line in diff.lines() {
341+
if line.starts_with("@@") {
342+
if hunk_idx < hunks.len() {
343+
current_old_line = hunks[hunk_idx].old_start;
344+
current_new_line = hunks[hunk_idx].new_start;
345+
hunk_idx += 1;
346+
in_hunk = true;
347+
}
348+
continue;
349+
}
350+
351+
if !in_hunk || line.starts_with("+++") || line.starts_with("---") {
352+
continue;
353+
}
354+
355+
if let Some(content) = line.strip_prefix('+') {
356+
if current_new_line >= new_start && current_new_line <= new_end {
357+
added_in_span.push(content);
358+
}
359+
current_new_line += 1;
360+
} else if let Some(content) = line.strip_prefix('-') {
361+
if current_old_line >= old_start && current_old_line <= old_end {
362+
removed_in_span.push(content);
363+
}
364+
current_old_line += 1;
365+
} else {
366+
current_old_line += 1;
367+
current_new_line += 1;
368+
}
369+
}
370+
371+
if added_in_span.is_empty() && removed_in_span.is_empty() {
372+
return None;
373+
}
374+
375+
// Check whitespace-only first (same logic as classify_span_change)
376+
let old_text: String = removed_in_span
377+
.iter()
378+
.flat_map(|l| l.chars())
379+
.filter(|c| !c.is_whitespace())
380+
.collect();
381+
let new_text: String = added_in_span
382+
.iter()
383+
.flat_map(|l| l.chars())
384+
.filter(|c| !c.is_whitespace())
385+
.collect();
386+
if old_text == new_text {
387+
return Some(SpanChangeKind::WhitespaceOnly);
388+
}
389+
390+
// Classify each changed line as doc or code
391+
let has_doc = added_in_span
392+
.iter()
393+
.chain(removed_in_span.iter())
394+
.any(|l| Self::is_doc_comment(l));
395+
let has_code = added_in_span.iter().chain(removed_in_span.iter()).any(|l| {
396+
let trimmed = l.trim();
397+
!trimmed.is_empty() && !Self::is_doc_comment(l)
398+
});
399+
400+
match (has_doc, has_code) {
401+
(true, false) => Some(SpanChangeKind::DocsOnly),
402+
(true, true) => Some(SpanChangeKind::Mixed),
403+
(false, _) => Some(SpanChangeKind::Semantic),
404+
}
405+
}
406+
407+
/// Check if a line looks like a doc comment or regular comment.
408+
fn is_doc_comment(line: &str) -> bool {
409+
let trimmed = line.trim();
410+
trimmed.starts_with("///")
411+
|| trimmed.starts_with("//!")
412+
|| trimmed.starts_with("/**")
413+
|| trimmed.starts_with("* ") // inside /** */ block
414+
|| trimmed.starts_with("*/")
415+
|| trimmed.starts_with('#') // Python/Ruby comments
416+
|| trimmed.starts_with("\"\"\"") // Python docstrings
417+
|| (trimmed.starts_with("//") && !trimmed.starts_with("///") && !trimmed.starts_with("//!"))
418+
}
419+
300420
pub fn infer_commit_type(
301421
changes: &StagedChanges,
302422
symbols: &[CodeSymbol],
303423
all_modified_whitespace_only: bool,
424+
all_modified_docs_only: bool,
304425
) -> CommitType {
305426
let categories: Vec<_> = changes.files.iter().map(|f| f.category).collect();
306427

@@ -314,6 +435,20 @@ impl ContextBuilder {
314435
return CommitType::Test;
315436
}
316437

438+
// Predominantly test additions (>80%) → test type
439+
// Cross-multiply to avoid integer division truncation (F-009):
440+
// test_additions/total_additions > 80/100 ⟹ test_additions * 100 > total_additions * 80
441+
let test_additions: usize = changes
442+
.files
443+
.iter()
444+
.filter(|f| f.category == FileCategory::Test)
445+
.map(|f| f.additions)
446+
.sum();
447+
let total_additions: usize = changes.files.iter().map(|f| f.additions).sum();
448+
if total_additions > 0 && test_additions * 100 > total_additions * 80 {
449+
return CommitType::Test;
450+
}
451+
317452
// All config -> chore
318453
if categories.iter().all(|c| *c == FileCategory::Config) {
319454
return CommitType::Chore;
@@ -330,6 +465,12 @@ impl ContextBuilder {
330465
return CommitType::Style;
331466
}
332467

468+
// All modified symbols are docs-only and no added/removed symbols → docs
469+
// (catches doc comment edits inside existing functions/structs)
470+
if all_modified_docs_only && symbols.is_empty() {
471+
return CommitType::Docs;
472+
}
473+
333474
// Explicit bug evidence -> fix
334475
if Self::detect_bug_evidence(changes) {
335476
return CommitType::Fix;
@@ -629,6 +770,14 @@ impl ContextBuilder {
629770
}
630771
};
631772

773+
// Append doc-vs-code suffix when span_change_kind is informative
774+
let suffix = match new_sym.span_change_kind {
775+
Some(SpanChangeKind::DocsOnly) => " [docs only]",
776+
Some(SpanChangeKind::Mixed) => " [docs + code]",
777+
_ => "",
778+
};
779+
let line = format!("{}{}", line, suffix);
780+
632781
if output.len() + line.len() + 1 > char_budget {
633782
break;
634783
}

src/services/splitter.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ impl CommitSplitter {
125125

126126
// Whitespace classification requires full build(); pass false here
127127
// since sub_symbols are not yet classified via classify_span_change.
128-
let commit_type = ContextBuilder::infer_commit_type(&sub_changes, &sub_symbols, false);
128+
let commit_type =
129+
ContextBuilder::infer_commit_type(&sub_changes, &sub_symbols, false, false);
129130
let scope = ContextBuilder::infer_scope(&sub_changes);
130131

131132
groups.push(CommitGroup {

0 commit comments

Comments
 (0)