Skip to content

Commit 1396b52

Browse files
committed
feat(eval): add 10 fixtures and integration test suite
Create evaluation fixtures covering feat, fix, refactor, chore, docs, style, test, AST signature extraction, cross-file connections, and MSRV breaking change detection. Each fixture includes metadata.toml with assertion sections (evidence, prompt, connections, breaking) and realistic unified diffs. AST fixtures include symbols.toml for injected CodeSymbol data. Add tests/eval.rs integration test with 7 test functions: all_fixtures, type_inference, evidence_flags, prompt_content, connections, breaking changes, and fixture count validation. All gated behind eval feature.
1 parent 916688f commit 1396b52

26 files changed

Lines changed: 775 additions & 2 deletions

File tree

src/eval.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,8 +461,7 @@ impl EvalRunner {
461461
fn load_metadata(&self, fixture_dir: &Path) -> Result<FixtureMetadata> {
462462
let content = std::fs::read_to_string(fixture_dir.join("metadata.toml"))
463463
.map_err(|e| Error::Config(format!("Cannot read metadata.toml: {}", e)))?;
464-
toml::from_str(&content)
465-
.map_err(|e| Error::Config(format!("Invalid metadata.toml: {}", e)))
464+
toml::from_str(&content).map_err(|e| Error::Config(format!("Invalid metadata.toml: {}", e)))
466465
}
467466

468467
fn load_config(&self, fixture_dir: &Path) -> Config {

tests/eval.rs

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
// SPDX-FileCopyrightText: 2026 Sephyi <me@sephy.io>
2+
//
3+
// SPDX-License-Identifier: PolyForm-Noncommercial-1.0.0
4+
5+
//! Integration tests for the evaluation harness.
6+
//!
7+
//! Runs all fixtures through the deterministic (no-LLM) pipeline and
8+
//! asserts type inference, evidence flags, prompt content, connections,
9+
//! and breaking change detection.
10+
11+
#![cfg(feature = "eval")]
12+
13+
use std::path::PathBuf;
14+
15+
use commitbee::eval::EvalRunner;
16+
17+
fn fixtures_dir() -> PathBuf {
18+
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/eval")
19+
}
20+
21+
/// Run all fixtures and assert every one passes.
22+
#[test]
23+
fn all_fixtures_pass() {
24+
let runner = EvalRunner::new(fixtures_dir(), None);
25+
let results = runner.run_sync().expect("eval runner should not error");
26+
27+
assert!(!results.is_empty(), "should discover at least one fixture");
28+
29+
let mut failures = Vec::new();
30+
for result in &results {
31+
if !result.passed() {
32+
let mut detail = format!("FIXTURE FAILED: {}\n", result.fixture_name);
33+
if !result.type_passed {
34+
detail.push_str(&format!(
35+
" Type: expected={}, actual={}\n",
36+
result.expected_type, result.actual_type
37+
));
38+
}
39+
if !result.scope_passed {
40+
detail.push_str(&format!(
41+
" Scope: expected={:?}, actual={:?}\n",
42+
result.expected_scope, result.actual_scope
43+
));
44+
}
45+
if !result.prompt_assembled {
46+
detail.push_str(" Prompt: failed to assemble\n");
47+
}
48+
for failure in &result.assertion_failures {
49+
detail.push_str(&format!(" {}\n", failure));
50+
}
51+
if let Some(ref err) = result.error {
52+
detail.push_str(&format!(" Error: {}\n", err));
53+
}
54+
failures.push(detail);
55+
}
56+
}
57+
58+
if !failures.is_empty() {
59+
panic!(
60+
"{} of {} fixtures failed:\n\n{}",
61+
failures.len(),
62+
results.len(),
63+
failures.join("\n")
64+
);
65+
}
66+
}
67+
68+
/// Each fixture category runs independently.
69+
#[test]
70+
fn type_inference_fixtures() {
71+
let runner = EvalRunner::new(fixtures_dir(), None);
72+
let results = runner.run_sync().expect("eval runner should not error");
73+
74+
for result in &results {
75+
assert!(
76+
result.type_passed,
77+
"Type mismatch in {}: expected={}, actual={}",
78+
result.fixture_name, result.expected_type, result.actual_type
79+
);
80+
}
81+
}
82+
83+
#[test]
84+
fn evidence_flag_fixtures() {
85+
let runner = EvalRunner::new(fixtures_dir(), None);
86+
let results = runner.run_sync().expect("eval runner should not error");
87+
88+
for result in &results {
89+
let evidence_failures: Vec<_> = result
90+
.assertion_failures
91+
.iter()
92+
.filter(|f| f.category == "evidence")
93+
.collect();
94+
95+
assert!(
96+
evidence_failures.is_empty(),
97+
"Evidence failures in {}: {:?}",
98+
result.fixture_name,
99+
evidence_failures
100+
.iter()
101+
.map(|f| &f.message)
102+
.collect::<Vec<_>>()
103+
);
104+
}
105+
}
106+
107+
#[test]
108+
fn prompt_content_fixtures() {
109+
let runner = EvalRunner::new(fixtures_dir(), None);
110+
let results = runner.run_sync().expect("eval runner should not error");
111+
112+
for result in &results {
113+
assert!(
114+
result.prompt_assembled,
115+
"Prompt assembly failed for {}",
116+
result.fixture_name
117+
);
118+
119+
let prompt_failures: Vec<_> = result
120+
.assertion_failures
121+
.iter()
122+
.filter(|f| f.category == "prompt")
123+
.collect();
124+
125+
assert!(
126+
prompt_failures.is_empty(),
127+
"Prompt content failures in {}: {:?}",
128+
result.fixture_name,
129+
prompt_failures
130+
.iter()
131+
.map(|f| &f.message)
132+
.collect::<Vec<_>>()
133+
);
134+
}
135+
}
136+
137+
#[test]
138+
fn connection_detection_fixtures() {
139+
let runner = EvalRunner::new(fixtures_dir(), None);
140+
let results = runner.run_sync().expect("eval runner should not error");
141+
142+
for result in &results {
143+
let conn_failures: Vec<_> = result
144+
.assertion_failures
145+
.iter()
146+
.filter(|f| f.category == "connections")
147+
.collect();
148+
149+
assert!(
150+
conn_failures.is_empty(),
151+
"Connection failures in {}: {:?}",
152+
result.fixture_name,
153+
conn_failures.iter().map(|f| &f.message).collect::<Vec<_>>()
154+
);
155+
}
156+
}
157+
158+
#[test]
159+
fn breaking_change_fixtures() {
160+
let runner = EvalRunner::new(fixtures_dir(), None);
161+
let results = runner.run_sync().expect("eval runner should not error");
162+
163+
for result in &results {
164+
let breaking_failures: Vec<_> = result
165+
.assertion_failures
166+
.iter()
167+
.filter(|f| f.category == "breaking")
168+
.collect();
169+
170+
assert!(
171+
breaking_failures.is_empty(),
172+
"Breaking change failures in {}: {:?}",
173+
result.fixture_name,
174+
breaking_failures
175+
.iter()
176+
.map(|f| &f.message)
177+
.collect::<Vec<_>>()
178+
);
179+
}
180+
}
181+
182+
/// Verify specific fixture count to catch accidental fixture deletion.
183+
#[test]
184+
fn fixture_count() {
185+
let runner = EvalRunner::new(fixtures_dir(), None);
186+
let results = runner.run_sync().expect("eval runner should not error");
187+
// 2 original (simple-feat, style-only) + 10 new = 12
188+
assert!(
189+
results.len() >= 12,
190+
"Expected at least 12 fixtures, found {}",
191+
results.len()
192+
);
193+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
diff --git a/src/services/validator.rs b/src/services/validator.rs
2+
new file mode 100644
3+
index 0000000..abc1234
4+
--- /dev/null
5+
+++ b/src/services/validator.rs
6+
@@ -0,0 +1,12 @@
7+
+use crate::error::Result;
8+
+
9+
+/// Validate user input before processing.
10+
+pub fn validate_input(input: &str) -> Result<()> {
11+
+ if input.is_empty() {
12+
+ return Err(crate::error::Error::Config("empty input".into()));
13+
+ }
14+
+ if input.len() > 1024 {
15+
+ return Err(crate::error::Error::Config("input too long".into()));
16+
+ }
17+
+ Ok(())
18+
+}
19+
diff --git a/src/services/handler.rs b/src/services/handler.rs
20+
new file mode 100644
21+
index 0000000..def5678
22+
--- /dev/null
23+
+++ b/src/services/handler.rs
24+
@@ -0,0 +1,15 @@
25+
+use crate::error::Result;
26+
+use super::validator::validate_input;
27+
+
28+
+pub struct RequestHandler;
29+
+
30+
+impl RequestHandler {
31+
+ pub fn handle(&self, request: &str) -> Result<String> {
32+
+ // Validate first, then process
33+
+ validate_input(request)?;
34+
+
35+
+ // Process the validated input
36+
+ let result = request.to_uppercase();
37+
+ Ok(result)
38+
+ }
39+
+}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SPDX-FileCopyrightText: 2026 Sephyi <me@sephy.io>
2+
#
3+
# SPDX-License-Identifier: PolyForm-Noncommercial-1.0.0
4+
5+
name = "ast-cross-file-connection"
6+
description = "Cross-file call should produce CONNECTIONS section in prompt"
7+
language = "rust"
8+
category = "feat"
9+
expected_type = "feat"
10+
expected_scope = "optional"
11+
12+
[prompt]
13+
must_contain = ["CONNECTIONS", "calls validate_input"]
14+
15+
[connections]
16+
min_count = 1
17+
must_contain = ["calls validate_input"]
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
[[symbols]]
2+
kind = "Function"
3+
name = "validate_input"
4+
file = "src/services/validator.rs"
5+
line = 4
6+
end_line = 11
7+
is_public = true
8+
is_added = true
9+
signature = "pub fn validate_input(input: &str) -> Result<()>"
10+
11+
[[symbols]]
12+
kind = "Struct"
13+
name = "RequestHandler"
14+
file = "src/services/handler.rs"
15+
line = 4
16+
end_line = 4
17+
is_public = true
18+
is_added = true
19+
signature = "pub struct RequestHandler"
20+
21+
[[symbols]]
22+
kind = "Function"
23+
name = "handle"
24+
file = "src/services/handler.rs"
25+
line = 7
26+
end_line = 14
27+
is_public = true
28+
is_added = true
29+
signature = "pub fn handle(&self, request: &str) -> Result<String>"
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
diff --git a/src/services/validator.rs b/src/services/validator.rs
2+
new file mode 100644
3+
index 0000000..abc1234
4+
--- /dev/null
5+
+++ b/src/services/validator.rs
6+
@@ -0,0 +1,18 @@
7+
+use crate::error::Result;
8+
+
9+
+pub struct InputValidator {
10+
+ max_length: usize,
11+
+}
12+
+
13+
+impl InputValidator {
14+
+ pub fn new(max_length: usize) -> Self {
15+
+ Self { max_length }
16+
+ }
17+
+
18+
+ pub fn validate(&self, input: &str) -> Result<()> {
19+
+ if input.len() > self.max_length {
20+
+ return Err(crate::error::Error::Config("input too long".into()));
21+
+ }
22+
+ Ok(())
23+
+ }
24+
+}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SPDX-FileCopyrightText: 2026 Sephyi <me@sephy.io>
2+
#
3+
# SPDX-License-Identifier: PolyForm-Noncommercial-1.0.0
4+
5+
name = "ast-signature-extraction"
6+
description = "Symbol with signature should appear in prompt SYMBOLS section"
7+
language = "rust"
8+
category = "feat"
9+
expected_type = "feat"
10+
expected_scope = "optional"
11+
12+
[evidence]
13+
has_new_public_api = true
14+
15+
[prompt]
16+
must_contain = ["SYMBOLS CHANGED", "pub fn validate"]
17+
must_not_contain = []
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
[[symbols]]
2+
kind = "Struct"
3+
name = "InputValidator"
4+
file = "src/services/validator.rs"
5+
line = 3
6+
end_line = 5
7+
is_public = true
8+
is_added = true
9+
signature = "pub struct InputValidator"
10+
11+
[[symbols]]
12+
kind = "Function"
13+
name = "new"
14+
file = "src/services/validator.rs"
15+
line = 8
16+
end_line = 10
17+
is_public = true
18+
is_added = true
19+
signature = "pub fn new(max_length: usize) -> Self"
20+
21+
[[symbols]]
22+
kind = "Function"
23+
name = "validate"
24+
file = "src/services/validator.rs"
25+
line = 12
26+
end_line = 17
27+
is_public = true
28+
is_added = true
29+
signature = "pub fn validate(&self, input: &str) -> Result<()>"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
diff --git a/Cargo.toml b/Cargo.toml
2+
index abc1234..def5678 100644
3+
--- a/Cargo.toml
4+
+++ b/Cargo.toml
5+
@@ -15,8 +15,8 @@ edition = "2024"
6+
7+
[dependencies]
8+
serde = { version = "1.0", features = ["derive"] }
9+
-tokio = { version = "1.40", features = ["full"] }
10+
-reqwest = { version = "0.12", features = ["json"] }
11+
+tokio = { version = "1.44", features = ["full"] }
12+
+reqwest = { version = "0.13", features = ["json"] }
13+
clap = { version = "4.5", features = ["derive"] }

0 commit comments

Comments
 (0)