@@ -286,20 +286,23 @@ impl EvalRunner {
286286
287287 self . print_results ( & results) ;
288288
289- let failed = results. iter ( ) . filter ( |r| !r. passed ( ) ) . count ( ) ;
290- if failed > 0 {
289+ // Print aggregate summary
290+ let summary = EvalSummary :: from_results ( & results) ;
291+ eprintln ! ( "{}" , summary. format_report( ) ) ;
292+
293+ if summary. total_failed > 0 {
291294 eprintln ! (
292- "\n {} {} fixture(s) failed" ,
295+ "{} {} fixture(s) failed" ,
293296 style( "FAIL" ) . red( ) . bold( ) ,
294- failed ,
297+ summary . total_failed ,
295298 ) ;
296299 std:: process:: exit ( 1 ) ;
297300 }
298301
299302 eprintln ! (
300- "\n {} All {} fixture(s) passed" ,
303+ "{} All {} fixture(s) passed" ,
301304 style( "PASS" ) . green( ) . bold( ) ,
302- results . len ( ) ,
305+ summary . total_passed ,
303306 ) ;
304307
305308 Ok ( ( ) )
@@ -1015,6 +1018,89 @@ impl EvalRunner {
10151018 }
10161019}
10171020
1021+ /// Aggregate evaluation summary with per-type accuracy breakdown.
1022+ #[ derive( Debug ) ]
1023+ pub struct EvalSummary {
1024+ pub total_fixtures : usize ,
1025+ pub total_passed : usize ,
1026+ pub total_failed : usize ,
1027+ /// Per-type accuracy: (type_name, passed, total).
1028+ pub per_type : Vec < ( String , usize , usize ) > ,
1029+ }
1030+
1031+ impl EvalSummary {
1032+ /// Build a summary from eval results.
1033+ #[ must_use]
1034+ pub fn from_results ( results : & [ EvalResult ] ) -> Self {
1035+ let total_fixtures = results. len ( ) ;
1036+ let total_passed = results. iter ( ) . filter ( |r| r. passed ( ) ) . count ( ) ;
1037+ let total_failed = total_fixtures - total_passed;
1038+
1039+ // Group by expected_type
1040+ let mut type_map: std:: collections:: BTreeMap < String , ( usize , usize ) > =
1041+ std:: collections:: BTreeMap :: new ( ) ;
1042+
1043+ for result in results {
1044+ let key = result. expected_type . to_lowercase ( ) ;
1045+ if key. is_empty ( ) {
1046+ continue ;
1047+ }
1048+ let entry = type_map. entry ( key) . or_insert ( ( 0 , 0 ) ) ;
1049+ entry. 1 += 1 ; // total
1050+ if result. passed ( ) {
1051+ entry. 0 += 1 ; // passed
1052+ }
1053+ }
1054+
1055+ let per_type: Vec < ( String , usize , usize ) > = type_map
1056+ . into_iter ( )
1057+ . map ( |( k, ( passed, total) ) | ( k, passed, total) )
1058+ . collect ( ) ;
1059+
1060+ Self {
1061+ total_fixtures,
1062+ total_passed,
1063+ total_failed,
1064+ per_type,
1065+ }
1066+ }
1067+
1068+ /// Format the summary as a human-readable report.
1069+ #[ must_use]
1070+ pub fn format_report ( & self ) -> String {
1071+ let mut report = String :: new ( ) ;
1072+
1073+ report. push_str ( "=== Eval Summary ===\n \n " ) ;
1074+
1075+ // Per-type breakdown
1076+ report. push_str ( "Per-type accuracy:\n " ) ;
1077+ for ( type_name, passed, total) in & self . per_type {
1078+ let pct = if * total > 0 {
1079+ ( * passed as f64 / * total as f64 ) * 100.0
1080+ } else {
1081+ 0.0
1082+ } ;
1083+ report. push_str ( & format ! (
1084+ " {}: {}/{} ({:.0}%)\n " ,
1085+ type_name, passed, total, pct
1086+ ) ) ;
1087+ }
1088+
1089+ // Overall score
1090+ let overall_pct = if self . total_fixtures > 0 {
1091+ ( self . total_passed as f64 / self . total_fixtures as f64 ) * 100.0
1092+ } else {
1093+ 0.0
1094+ } ;
1095+ report. push_str ( & format ! (
1096+ "\n Overall: {}/{} ({:.1}%)\n " ,
1097+ self . total_passed, self . total_fixtures, overall_pct
1098+ ) ) ;
1099+
1100+ report
1101+ }
1102+ }
1103+
10181104/// Parse a symbol kind string from TOML into `SymbolKind`.
10191105fn parse_symbol_kind ( kind : & str ) -> SymbolKind {
10201106 match kind. to_lowercase ( ) . as_str ( ) {
0 commit comments