Add pre-compare transforms for non-deterministic test output

mavam · claude · mavam · commit bc34fbb31b2f · 2026-01-30T22:50:01.000+01:00
Introduces a `pre-compare` frontmatter option that normalizes test output
before comparison, allowing tests with non-deterministic ordering to pass
reliably.

Usage in TQL/shell/Python tests:
  ---
  pre-compare: [sort]
  ---

Or in test.yaml for directory-level configuration:
  pre-compare: [sort]

The initial transform is `sort`, which sorts output lines lexicographically.
Transforms apply only at comparison time - baselines store original output.

Co-Authored-By: Claude Opus 4.5 &lt;noreply@anthropic.com&gt;
diff --git a/changelog/unreleased/add-pre-compare-transforms-for-non-deterministic-output.md b/changelog/unreleased/add-pre-compare-transforms-for-non-deterministic-output.md
@@ -0,0 +1,19 @@
+---
+title: Add pre-compare transforms for non-deterministic output
+type: feature
+authors:
+  - mavam
+  - claude
+created: 2026-01-30T20:46:00.000000Z
+---
+
+The test framework now supports pre-compare transforms that normalize output before comparison with baselines. This helps handle tests with non-deterministic output like unordered result sets from hash-based aggregations or parallel operations.
+
+Configure the `pre-compare` option in `test.yaml` or per-test frontmatter to apply transforms to both actual output and baselines before comparison:
+
+```yaml
+# Sort output lines for comparison (baseline stays unchanged)
+pre-compare: sort
+```
+
+The `sort` transform sorts output lines lexicographically, making it easy to handle unordered results. Transforms only affect comparison—baseline files remain untransformed on disk, and `--update` continues to store original output.
diff --git a/example-project/tests/pre-compare-sort.sh b/example-project/tests/pre-compare-sort.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+# pre-compare: sort
+
+# Demonstrate pre-compare transform for handling non-deterministic output.
+# This test produces lines in random order, but the sort transform ensures
+# comparison succeeds against a sorted baseline.
+
+echo "zebra"
+echo "alpha"
+echo "charlie"
+echo "bravo"
diff --git a/example-project/tests/pre-compare-sort.txt b/example-project/tests/pre-compare-sort.txt
@@ -0,0 +1,4 @@
+alpha
+bravo
+charlie
+zebra
diff --git a/src/tenzir_test/runners/custom_python_fixture_runner.py b/src/tenzir_test/runners/custom_python_fixture_runner.py
@@ -135,12 +135,17 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
                         return False
                     run_mod.log_comparison(test, ref_path, mode="comparing")
                     expected = ref_path.read_bytes()
-                    if expected != output:
+                    pre_compare = typing.cast(
+                        tuple[str, ...], test_config.get("pre_compare", tuple())
+                    )
+                    expected_transformed = run_mod.apply_pre_compare(expected, pre_compare)
+                    output_transformed = run_mod.apply_pre_compare(output, pre_compare)
+                    if expected_transformed != output_transformed:
                         if run_mod.interrupt_requested():
                             run_mod.report_interrupted_test(test)
                         else:
                             run_mod.report_failure(test, "")
-                            run_mod.print_diff(expected, output, ref_path)
+                            run_mod.print_diff(expected_transformed, output_transformed, ref_path)
                         return False
             finally:
                 fixture_api.pop_context(context_token)
diff --git a/src/tenzir_test/runners/diff_runner.py b/src/tenzir_test/runners/diff_runner.py
@@ -68,6 +68,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
                     raise RuntimeError("TENZIR_BINARY must be configured for diff runners")
                 base_cmd: list[str] = [*binary, *config_args]
 
+                coverage_dir = ""
                 if coverage:
                     coverage_dir = env.get(
                         "CMAKE_COVERAGE_OUTPUT_DIRECTORY",
@@ -111,6 +112,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
         root_bytes = str(run_mod.ROOT).encode() + b"/"
         unoptimized_stdout = unoptimized.stdout.replace(root_bytes, b"")
         optimized_stdout = optimized.stdout.replace(root_bytes, b"")
+        # Generate diff without transforms first
         diff_chunks = list(
             difflib.diff_bytes(
                 difflib.unified_diff,
@@ -130,12 +132,15 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool | str:
             ref_path.write_bytes(diff_bytes)
         else:
             expected = ref_path.read_bytes()
-            if diff_bytes != expected:
+            pre_compare = typing.cast(tuple[str, ...], test_config.get("pre_compare", tuple()))
+            expected_transformed = run_mod.apply_pre_compare(expected, pre_compare)
+            actual_transformed = run_mod.apply_pre_compare(diff_bytes, pre_compare)
+            if actual_transformed != expected_transformed:
                 if run_mod.interrupt_requested():
                     run_mod.report_interrupted_test(test)
                 else:
                     run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected, diff_bytes, ref_path)
+                    run_mod.print_diff(expected_transformed, actual_transformed, ref_path)
                 return False
         run_mod.success(test)
         return True
diff --git a/src/tenzir_test/runners/shell_runner.py b/src/tenzir_test/runners/shell_runner.py
@@ -125,6 +125,7 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
             run_mod.success(test)
             return True
 
+        pre_compare = typing.cast(tuple[str, ...], test_config.get("pre_compare", tuple()))
         if combined_bytes:
             if not stdout_path.exists():
                 run_mod.report_failure(
@@ -134,22 +135,28 @@ def run(self, test: Path, update: bool, coverage: bool = False) -> bool:
                 return False
             run_mod.log_comparison(test, stdout_path, mode="comparing")
             expected_stdout = stdout_path.read_bytes()
-            if expected_stdout != combined_bytes:
+            expected_transformed = run_mod.apply_pre_compare(expected_stdout, pre_compare)
+            actual_transformed = run_mod.apply_pre_compare(combined_bytes, pre_compare)
+            if expected_transformed != actual_transformed:
                 if run_mod.interrupt_requested():
                     run_mod.report_interrupted_test(test)
                 else:
                     run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected_stdout, combined_bytes, stdout_path)
+                    run_mod.print_diff(expected_transformed, actual_transformed, stdout_path)
                 return False
         elif stdout_path.exists():
             expected_stdout = stdout_path.read_bytes()
+            # Check if original baseline is empty before transformation
             if expected_stdout not in {b"", b"\n"}:
-                if run_mod.interrupt_requested():
-                    run_mod.report_interrupted_test(test)
-                else:
-                    run_mod.report_failure(test, "")
-                    run_mod.print_diff(expected_stdout, b"", stdout_path)
-                return False
+                expected_transformed = run_mod.apply_pre_compare(expected_stdout, pre_compare)
+                actual_transformed = run_mod.apply_pre_compare(b"", pre_compare)
+                if expected_transformed != actual_transformed:
+                    if run_mod.interrupt_requested():
+                        run_mod.report_interrupted_test(test)
+                    else:
+                        run_mod.report_failure(test, "")
+                        run_mod.print_diff(expected_transformed, actual_transformed, stdout_path)
+                    return False
 
         run_mod.success(test)
         return True
diff --git a/tests/test_run.py b/tests/test_run.py
@@ -1583,20 +1583,18 @@ def test_directory_with_test_yaml_inside_root_is_selector(tmp_path, monkeypatch)
 
 
 class TestTransformSort:
-    def test_empty_input_returns_empty(self):
-        assert run._transform_sort(b"") == b""
-
-    def test_single_line_without_newline(self):
-        assert run._transform_sort(b"hello") == b"hello"
-
-    def test_single_line_with_newline(self):
-        assert run._transform_sort(b"hello\n") == b"hello\n"
-
-    def test_multiple_lines_get_sorted(self):
-        assert run._transform_sort(b"zebra\napple\nmango\n") == b"apple\nmango\nzebra\n"
-
-    def test_duplicate_lines_preserved(self):
-        assert run._transform_sort(b"b\na\nb\na\n") == b"a\na\nb\nb\n"
+    @pytest.mark.parametrize(
+        "input_data,expected_output",
+        [
+            (b"", b""),
+            (b"hello", b"hello"),
+            (b"hello\n", b"hello\n"),
+            (b"zebra\napple\nmango\n", b"apple\nmango\nzebra\n"),
+            (b"b\na\nb\na\n", b"a\na\nb\nb\n"),
+        ],
+    )
+    def test_sort_transform(self, input_data, expected_output):
+        assert run._transform_sort(input_data) == expected_output
 
     def test_trailing_newline_preserved(self):
         result = run._transform_sort(b"b\na\n")
diff --git a/tests/test_run_config.py b/tests/test_run_config.py
@@ -72,6 +72,7 @@ def test_parse_test_config_override(tmp_path: Path, configured_root: Path) -> No
         "inputs": None,
         "retry": 1,
         "package_dirs": tuple(),
+        "pre_compare": tuple(),
     }
 
 
@@ -102,6 +103,7 @@ def test_parse_test_config_yaml_frontmatter(tmp_path: Path, configured_root: Pat
         "inputs": None,
         "retry": 1,
         "package_dirs": tuple(),
+        "pre_compare": tuple(),
     }
 
 
@@ -341,6 +343,7 @@ def test_parse_python_comment_frontmatter(tmp_path: Path, configured_root: Path)
         "inputs": None,
         "retry": 1,
         "package_dirs": tuple(),
+        "pre_compare": tuple(),
     }
 
 

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +alpha
 +bravo
 +charlie
 +zebra
Original file line number	Diff line number	Diff line change
`@@ -72,6 +72,7 @@ def test_parse_test_config_override(tmp_path: Path, configured_root: Path) -> No`
`72`	`72`	`"inputs": None,`
`73`	`73`	`"retry": 1,`
`74`	`74`	`"package_dirs": tuple(),`
	`75`	`+ "pre_compare": tuple(),`
`75`	`76`	`}`
`76`	`77`
`77`	`78`
`@@ -102,6 +103,7 @@ def test_parse_test_config_yaml_frontmatter(tmp_path: Path, configured_root: Pat`
`102`	`103`	`"inputs": None,`
`103`	`104`	`"retry": 1,`
`104`	`105`	`"package_dirs": tuple(),`
	`106`	`+ "pre_compare": tuple(),`
`105`	`107`	`}`
`106`	`108`
`107`	`109`
`@@ -341,6 +343,7 @@ def test_parse_python_comment_frontmatter(tmp_path: Path, configured_root: Path)`
`341`	`343`	`"inputs": None,`
`342`	`344`	`"retry": 1,`
`343`	`345`	`"package_dirs": tuple(),`
	`346`	`+ "pre_compare": tuple(),`
`344`	`347`	`}`
`345`	`348`
`346`	`349`