Skip to content

Commit c8f67e3

Browse files
committed
SOCIAL_MEDIA +152.4%
HIGHLY_CONNECTED +56.6% PHARMA +54.6% MEDICAL_RECORDS -2.0% ENGINEERING -26.7% TRAIN -54.6% LIBRARY -55.0% ELECTRICAL_GRID -57.9%
1 parent 3d6cb91 commit c8f67e3

26 files changed

Lines changed: 77314 additions & 23861 deletions

.codex/skills/jmh-benchmark-compare/scripts/jmh_compare_core.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,11 @@
1717
re.IGNORECASE,
1818
)
1919
METRIC_COLUMNS = {"Score", "Error", "Cnt"}
20+
JMH_MODES = {"thrpt", "avgt", "sample", "ss", "all"}
21+
STRICT_NUM_RE = re.compile(
22+
r"[-+]?(?:(?:\d+(?:,\d{3})*|\d+)(?:\.\d+)?|\.\d+)(?:[eE][-+]?\d+)?|[-+]?(?:inf|nan)",
23+
re.IGNORECASE,
24+
)
2025
DATE_PATTERNS = (
2126
re.compile(
2227
r"(20\d{2})[-_]?([01]\d)[-_]?([0-3]\d)[Tt _-]?([0-2]\d)[-_:]?([0-5]\d)(?:[-_:]?([0-5]\d))?"
@@ -123,17 +128,26 @@ def is_int_token(text: str) -> bool:
123128
return bool(re.fullmatch(r"[+-]?\d+", text or ""))
124129

125130

131+
def is_numeric_metric_token(text: str) -> bool:
132+
value = (text or "").strip()
133+
if value.endswith("±"):
134+
value = value[:-1].strip()
135+
return bool(STRICT_NUM_RE.fullmatch(value))
136+
137+
126138
def has_valid_metric_values(row: Dict[str, str], columns: Sequence[str]) -> bool:
139+
if "Mode" in columns and row.get("Mode", "").strip().lower() not in JMH_MODES:
140+
return False
127141
for col in columns:
128142
value = row.get(col, "")
129143
if col == "Score":
130-
if extract_numeric(value) is None:
144+
if not is_numeric_metric_token(value):
131145
return False
132146
elif col == "Cnt" and value:
133147
if not is_int_token(value):
134148
return False
135149
elif col == "Error" and value:
136-
if extract_numeric(value) is None:
150+
if not is_numeric_metric_token(value):
137151
return False
138152
return True
139153

@@ -227,8 +241,6 @@ def parse_file(path: Path, label: str, id_columns: Optional[str], timestamp_sour
227241
for line in lines[header_idx + 1 :]:
228242
stripped = line.strip()
229243
if not stripped:
230-
if saw_data:
231-
break
232244
continue
233245
if stripped.startswith("#"):
234246
continue
@@ -244,6 +256,10 @@ def parse_file(path: Path, label: str, id_columns: Optional[str], timestamp_sour
244256
if saw_data:
245257
break
246258
continue
259+
if not has_valid_metric_values(row, columns):
260+
if saw_data:
261+
break
262+
continue
247263
score = extract_numeric(row.get("Score", ""))
248264
if score is None:
249265
if saw_data and (stripped.startswith("Result") or stripped.startswith("Secondary result")):

.codex/skills/jmh-benchmark-compare/scripts/test_jmh_compare_core.py

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_missing_cnt_and_error_values_do_not_shift_score(self) -> None:
2020
repo_root = SCRIPT_DIR.parents[3]
2121
result_file = (
2222
repo_root
23-
/ "core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/results-2026-03-01.md"
23+
/ "core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/results-2026-03-01.md"
2424
)
2525

2626
parsed = core.parse_file(result_file, "results-2026-03-01", None, "mtime")
@@ -33,7 +33,7 @@ def test_plus_minus_error_rows_keep_score_numeric(self) -> None:
3333
repo_root = SCRIPT_DIR.parents[3]
3434
result_file = (
3535
repo_root
36-
/ "core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/results-2026-03-04.md"
36+
/ "core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/results-2026-03-04.md"
3737
)
3838

3939
parsed = core.parse_file(result_file, "results-2026-03-04", None, "mtime")
@@ -74,6 +74,58 @@ def test_compare_uses_column_names_when_key_order_differs(self) -> None:
7474
self.assertAlmostEqual(row["Score [right]"], 20.0, places=3)
7575
self.assertAlmostEqual(row["Diff % [right - left]"], 100.0, places=3)
7676

77+
def test_blank_lines_between_jmh_rows_do_not_end_table(self) -> None:
78+
results = "\n".join(
79+
[
80+
"Benchmark (themeName) (z_queryIndex) Mode Score Units",
81+
"ThemeQueryBenchmark.executeQuery MEDICAL_RECORDS 0 avgt 10.0 ms/op",
82+
"",
83+
"ThemeQueryBenchmark.executeQuery SOCIAL_MEDIA 8 avgt 20.0 ms/op",
84+
]
85+
)
86+
87+
with tempfile.TemporaryDirectory() as tmpdir:
88+
result_file = Path(tmpdir) / "results.txt"
89+
result_file.write_text(results, encoding="utf-8")
90+
91+
parsed = core.parse_file(result_file, "results", None, "mtime")
92+
93+
self.assertEqual(len(parsed.rows), 2)
94+
key = ("ThemeQueryBenchmark.executeQuery", "SOCIAL_MEDIA", "8", "avgt", "ms/op")
95+
self.assertIn(key, parsed.score_by_key)
96+
self.assertAlmostEqual(parsed.score_by_key[key], 20.0, places=3)
97+
98+
def test_non_jmh_text_after_blank_does_not_parse_as_rows(self) -> None:
99+
results = "\n".join(
100+
[
101+
"Benchmark (themeName) (z_queryIndex) Mode Cnt Score Error Units",
102+
"ThemeQueryBenchmark.executeQuery MEDICAL_RECORDS 0 avgt 10.0 ms/op",
103+
"",
104+
"ThemeQueryBenchmark.executeQuery SOCIAL_MEDIA 8 avgt 20.0 ms/op",
105+
"",
106+
"Initializing state: k=64, subjectBuckets=4096, predicateBuckets=64, "
107+
"objectBuckets=4096, contextBuckets=16, contextPairSketchesEnabled=false",
108+
"Projection (resultSizeActual=1, hasNextCallCountActual=2)",
109+
]
110+
)
111+
112+
with tempfile.TemporaryDirectory() as tmpdir:
113+
result_file = Path(tmpdir) / "results.txt"
114+
result_file.write_text(results, encoding="utf-8")
115+
116+
parsed = core.parse_file(result_file, "results", None, "mtime")
117+
118+
self.assertEqual(
119+
[
120+
row["Benchmark"]
121+
for row in parsed.rows
122+
],
123+
[
124+
"ThemeQueryBenchmark.executeQuery",
125+
"ThemeQueryBenchmark.executeQuery",
126+
],
127+
)
128+
77129

78130
if __name__ == "__main__":
79131
unittest.main()

0 commit comments

Comments
 (0)