eclipse-rdf4j · hmottestad · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/.codex/skills/jmh-benchmark-compare/scripts/jmh_compare_core.py b/.codex/skills/jmh-benchmark-compare/scripts/jmh_compare_core.py
@@ -17,6 +17,11 @@
     re.IGNORECASE,
 )
 METRIC_COLUMNS = {"Score", "Error", "Cnt"}
+JMH_MODES = {"thrpt", "avgt", "sample", "ss", "all"}
+STRICT_NUM_RE = re.compile(
+    r"[-+]?(?:(?:\d+(?:,\d{3})*|\d+)(?:\.\d+)?|\.\d+)(?:[eE][-+]?\d+)?|[-+]?(?:inf|nan)",
+    re.IGNORECASE,
+)
 DATE_PATTERNS = (
     re.compile(
         r"(20\d{2})[-_]?([01]\d)[-_]?([0-3]\d)[Tt _-]?([0-2]\d)[-_:]?([0-5]\d)(?:[-_:]?([0-5]\d))?"
@@ -123,17 +128,26 @@ def is_int_token(text: str) -> bool:
     return bool(re.fullmatch(r"[+-]?\d+", text or ""))
 
 
+def is_numeric_metric_token(text: str) -> bool:
+    value = (text or "").strip()
+    if value.endswith("±"):
+        value = value[:-1].strip()
+    return bool(STRICT_NUM_RE.fullmatch(value))
+
+
 def has_valid_metric_values(row: Dict[str, str], columns: Sequence[str]) -> bool:
+    if "Mode" in columns and row.get("Mode", "").strip().lower() not in JMH_MODES:
+        return False
     for col in columns:
         value = row.get(col, "")
         if col == "Score":
-            if extract_numeric(value) is None:
+            if not is_numeric_metric_token(value):
                 return False
         elif col == "Cnt" and value:
             if not is_int_token(value):
                 return False
         elif col == "Error" and value:
-            if extract_numeric(value) is None:
+            if not is_numeric_metric_token(value):
                 return False
     return True
 
@@ -227,8 +241,6 @@ def parse_file(path: Path, label: str, id_columns: Optional[str], timestamp_sour
     for line in lines[header_idx + 1 :]:
         stripped = line.strip()
         if not stripped:
-            if saw_data:
-                break
             continue
         if stripped.startswith("#"):
             continue
@@ -244,6 +256,10 @@ def parse_file(path: Path, label: str, id_columns: Optional[str], timestamp_sour
             if saw_data:
                 break
             continue
+        if not has_valid_metric_values(row, columns):
+            if saw_data:
+                break
+            continue
         score = extract_numeric(row.get("Score", ""))
         if score is None:
             if saw_data and (stripped.startswith("Result") or stripped.startswith("Secondary result")):

diff --git a/.codex/skills/jmh-benchmark-compare/scripts/test_jmh_compare_core.py b/.codex/skills/jmh-benchmark-compare/scripts/test_jmh_compare_core.py
@@ -20,7 +20,7 @@ def test_missing_cnt_and_error_values_do_not_shift_score(self) -> None:
         repo_root = SCRIPT_DIR.parents[3]
         result_file = (
             repo_root
-            / "core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/results-2026-03-01.md"
+            / "core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/results-2026-03-01.md"
         )
 
         parsed = core.parse_file(result_file, "results-2026-03-01", None, "mtime")
@@ -33,7 +33,7 @@ def test_plus_minus_error_rows_keep_score_numeric(self) -> None:
         repo_root = SCRIPT_DIR.parents[3]
         result_file = (
             repo_root
-            / "core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/results-2026-03-04.md"
+            / "core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/results-2026-03-04.md"
         )
 
         parsed = core.parse_file(result_file, "results-2026-03-04", None, "mtime")
@@ -74,6 +74,58 @@ def test_compare_uses_column_names_when_key_order_differs(self) -> None:
             self.assertAlmostEqual(row["Score [right]"], 20.0, places=3)
             self.assertAlmostEqual(row["Diff % [right - left]"], 100.0, places=3)
 
+    def test_blank_lines_between_jmh_rows_do_not_end_table(self) -> None:
+        results = "\n".join(
+            [
+                "Benchmark  (themeName)  (z_queryIndex)  Mode  Score  Units",
+                "ThemeQueryBenchmark.executeQuery  MEDICAL_RECORDS  0  avgt  10.0  ms/op",
+                "",
+                "ThemeQueryBenchmark.executeQuery  SOCIAL_MEDIA  8  avgt  20.0  ms/op",
+            ]
+        )
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            result_file = Path(tmpdir) / "results.txt"
+            result_file.write_text(results, encoding="utf-8")
+
+            parsed = core.parse_file(result_file, "results", None, "mtime")
+
+            self.assertEqual(len(parsed.rows), 2)
+            key = ("ThemeQueryBenchmark.executeQuery", "SOCIAL_MEDIA", "8", "avgt", "ms/op")
+            self.assertIn(key, parsed.score_by_key)
+            self.assertAlmostEqual(parsed.score_by_key[key], 20.0, places=3)
+
+    def test_non_jmh_text_after_blank_does_not_parse_as_rows(self) -> None:
+        results = "\n".join(
+            [
+                "Benchmark                              (themeName)  (z_queryIndex)  Mode  Cnt      Score   Error  Units",
+                "ThemeQueryBenchmark.executeQuery   MEDICAL_RECORDS               0  avgt          10.0          ms/op",
+                "",
+                "ThemeQueryBenchmark.executeQuery      SOCIAL_MEDIA               8  avgt          20.0          ms/op",
+                "",
+                "Initializing state: k=64, subjectBuckets=4096, predicateBuckets=64, "
+                "objectBuckets=4096, contextBuckets=16, contextPairSketchesEnabled=false",
+                "Projection (resultSizeActual=1, hasNextCallCountActual=2)",
+            ]
+        )
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            result_file = Path(tmpdir) / "results.txt"
+            result_file.write_text(results, encoding="utf-8")
+
+            parsed = core.parse_file(result_file, "results", None, "mtime")
+
+            self.assertEqual(
+                [
+                    row["Benchmark"]
+                    for row in parsed.rows
+                ],
+                [
+                    "ThemeQueryBenchmark.executeQuery",
+                    "ThemeQueryBenchmark.executeQuery",
+                ],
+            )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/.codex/skills/mvnf/SKILL.md b/.codex/skills/mvnf/SKILL.md
@@ -30,3 +30,14 @@ If the test run fails, it prints the list of Surefire/Failsafe report files unde
 - `--module <path>`: Force the module when the test class name exists in multiple modules.
 - `--it`: Treat the selector as an integration test and pass it via `-Dit.test=...`.
 - `--no-offline`: Run Maven commands without `-o` (useful if offline resolution fails).
+
+## LMDB regression speedup note
+
+For LMDB theme regression/snapshot tests, enable persistent prepared stores to skip repeated dataset rebuilds:
+
+- `-Drdf4j.lmdb.themeRegression.persistentStore.enabled=true`
+- Optional root override: `-Drdf4j.lmdb.themeRegression.persistentStore.root=persistent-lmdb-theme-store`
+
+`mvnf.py` does not forward arbitrary `-D` flags today, so use direct Maven for this mode, for example:
+
+- `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/lmdb -Dtest=LmdbThemeQueryRegressionTest#socialMediaFiveCycleInterleavesValuesWithFollowsEdges -Drdf4j.lmdb.themeRegression.persistentStore.enabled=true test`
diff --git a/.codex/skills/query-plan-snapshot-cli/SKILL.md b/.codex/skills/query-plan-snapshot-cli/SKILL.md
@@ -5,10 +5,82 @@ description: Use QueryPlanSnapshotCli to capture and compare RDF4J query plans,
 
 # query-plan-snapshot-cli
 
-Use this skill to run reproducible query-plan captures and classify likely regression/improvement signals.
+Use this skill to run reproducible query-plan captures, triage historical theme-query benchmark results, and classify likely regression/improvement signals.
 
 ## Fast workflow
 
+1. Capture raw benchmark output into a normalized result file when needed.
+2. Analyze the newest dated run against historical results.
+3. Drill into the fastest known runs for a specific theme/query.
+4. If needed, capture baseline/candidate plan snapshots and diff them semantically.
+
+## History triage
+
+Result files live in:
+
+- `core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results`
+
+Normalize raw JMH output into a new result file:
+
+- `pbpaste | scripts/theme-query-benchmark-results.sh capture`
+- `scripts/theme-query-benchmark-results.sh capture raw-jmh.txt`
+
+Analyze only the queries that are more than 20% slower than history:
+
+- `core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/analyze-theme-query-history.sh`
+
+Sort regressions from biggest to smallest:
+
+- `core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/analyze-theme-query-history.sh --sort-regressions`
+
+Only print the top N regressions:
+
+- `core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/analyze-theme-query-history.sh --top 10`
+
+Analyze every latest query, including current-run wins over previous best:
+
+- `core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/analyze-theme-query-history.sh --all`
+
+Drill into the three fastest known runs for one theme/query and print optimized plan/query when present:
+
+- `core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/benchmark/theme-query-benchmark-results/analyze-theme-query-history.sh --theme PHARMA --query-index 10`
+
+Interpretation:
+
+- Default mode: newest dated file only for the “latest” baseline; compares against all other `results-*.md`, including `results-develop.md` and `results-main-branch.md`, but prints only queries where latest is more than 20% slower than historical best.
+- `--sort-regressions`: flat regression list, biggest slowdown first.
+- `--top N`: top N regressions only; implies regression sorting.
+- `--all`: prints every latest query; if latest is a new best it prints how much faster it is than the previous best.
+- Query detail mode: top three runs sorted by score ascending; ties prefer richer files with plan/query content.
+- `plan no | query yes`: optimized query rendered, no physical plan block in that result file.
+- `plan no | query no`: summary-only run or no per-query capture in that file.
+
+Use this path when the goal is optimizer-loop work: find the fastest known plan/query for a theme/query, then compare new runs back to that history before touching production logic.
+
+## Fast regression test loop (persistent LMDB theme stores)
+
+Theme regression/snapshot tests in `core/sail/lmdb` now support reusing a prepared LMDB store across runs.
+
+- Enable persistent reuse:
+  - `-Drdf4j.lmdb.themeRegression.persistentStore.enabled=true`
+- Optional custom root directory:
+  - `-Drdf4j.lmdb.themeRegression.persistentStore.root=persistent-lmdb-theme-store`
+- Default root directory:
+  - `persistent-lmdb-theme-store`
+
+Behavior:
+
+- If the store has expected `triples/data.mdb` and `values/data.mdb` sizes (from `expected-db-file-sizes.properties`), tests reuse it and skip rebuild/ingest.
+- If sizes mismatch or the marker file is missing/invalid, tests rebuild the store, then refresh the expected-size file.
+
+Example focused run:
+
+- `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/lmdb -Dtest=LmdbThemeQueryRegressionTest#socialMediaFiveCycleInterleavesValuesWithFollowsEdges -Drdf4j.lmdb.themeRegression.persistentStore.enabled=true test`
+
+## Snapshot diff workflow
+
+Use this when you need semantic plan diffs between two controlled captures of the same query.
+
 1. Capture baseline run (main/reference commit).
 2. Capture candidate run (changed commit) with same query selector + `--query-id`.
 3. Produce semantic diff (`--compare-existing`).

diff --git a/.gitignore b/.gitignore
@@ -56,4 +56,7 @@ e2e/test-results
 .serena/
 .vscode
 /.codex/environments/environment.toml
-/.m2_repo_linux_j25
+improved-optimizers-query-rewrite-sketch-based-lmdb-page-walking/
+/.m2_repo_linux_j25/
+/core/sail/lmdb/persistent-lmdb-theme-store/
+core/sail/lmdb/persistent-lmdb-theme-store
diff --git a/AGENTS.md b/AGENTS.md
@@ -8,7 +8,7 @@ Before taking any action (either tool calls *or* responses to the user), you mus
 
    1.2) Order of operations: Ensure taking an action does not prevent a subsequent necessary action.
 
-        1.2.1) The user may request actions in a random order, but you may need to reorder operations to maximize successful completion of the task.
+     1.2.1) The user may request actions in a random order, but you may need to reorder operations to maximize successful completion of the task.
 
    1.3) Other prerequisites (information and/or actions needed).
 
@@ -48,9 +48,9 @@ Before taking any action (either tool calls *or* responses to the user), you mus
 
    7.2) Avoid premature conclusions: There may be multiple relevant options for a given situation.
 
-        7.2.1) To check for whether an option is relevant, reason about all information sources from #5.  
+     7.2.1) To check for whether an option is relevant, reason about all information sources from #5.  
 
-        7.2.2) You may need to consult the user to even know whether something is applicable. Do not assume it is not applicable without checking.
+     7.2.2) You may need to consult the user to even know whether something is applicable. Do not assume it is not applicable without checking.
 
    7.3) Review applicable sources of information from #5 to confirm which are relevant to the current state.
 
@@ -288,7 +288,7 @@ Plan
 1. **Compile deps fast (skip tests):**
    `mvn -o -Dmaven.repo.local=.m2_repo -pl <module> -am -Pquick clean install`
 2. **Run tests:**
-   `mvn -o -Dmaven.repo.local=.m2_repo -pl <module> verify | tail -500`
+   `python3 .codex/skills/mvnf/scripts/mvnf.py <module> --retain-logs --stream` or `mvn -o -Dmaven.repo.local=.m2_repo -pl <module> verify | tail -500` 
 
 It is illegal to `-am` when running tests!
 It is illegal to `-q` when running tests!
@@ -677,7 +677,6 @@ Immediately after creating any new Java source file, add the signature comment (
 * Slow tests (by module):
   `mvn -o -Dmaven.repo.local=.m2_repo -pl <module> verify -PslowTestsOnly,-skipSlowTests | tail -500`
 * Slow tests (specific test):
-
     * `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500`
 * Integration tests (entire repo):
   `mvn -o -Dmaven.repo.local=.m2_repo verify -PskipUnitTests | tail -500`

diff --git a/core/model/src/main/java/org/eclipse/rdf4j/model/impl/AbstractMemoryOverflowModel.java b/core/model/src/main/java/org/eclipse/rdf4j/model/impl/AbstractMemoryOverflowModel.java
@@ -8,6 +8,7 @@
  *
  * SPDX-License-Identifier: BSD-3-Clause
  ******************************************************************************/
+// Some portions generated by Codex
 
 package org.eclipse.rdf4j.model.impl;