Skip to content

Commit 81762cd

Browse files
neuron7xLabclaude
andcommitted
test(robustness): 55-test battery — primitives, contract, suites, gates, no-interference
Coverage matrix: - test_robustness_primitives.py (18) — CPCV shape/embargo/purge, PBO bounds on pure-noise vs signal families, PSR high/zero/degenerate, null audit shape/determinism/validation, jitter anchor-recovery, jitter name-in-anchor + negative-fraction error paths. - test_kuramoto_contract.py (6) — 28-hash verification, missing manifest fail-closed, sha256 mismatch fail-closed, missing-file fail-closed, schema-consistency assertions, daily_returns shape. - test_kuramoto_candidate_set.py (5) — legit names accepted, each forbidden prefix rejected, multi-offender listing, anchor-cover. - test_kuramoto_suites.py (10) — CPCV pbo bounds + fold count, null two-family shape + determinism + invalid-bootstrap error, jitter mode + anchor + forbidden-rejection + monotonicity. - test_kuramoto_gate_runner.py (12) — decision-layer PASS/FAIL/ INSUFFICIENT truth table + end-to-end pipeline on frozen bundle. - test_kuramoto_no_interference.py (4) — AST + regex scan asserting no writes under shadow_validation/, demo/, core/cross_asset_kuramoto/, etc.; all result-path literals route to robustness_v1/ or a frozen read-only input; no imports from execution/strategies/paper_trader. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 73f1e21 commit 81762cd

7 files changed

Lines changed: 702 additions & 0 deletions

tests/research/robustness/__init__.py

Whitespace-only changes.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright (c) 2023-2026 Yaroslav Vasylenko (neuron7xLab)
2+
# SPDX-License-Identifier: MIT
3+
"""Candidate-set anti-inflation guard tests."""
4+
5+
from __future__ import annotations
6+
7+
import pytest
8+
9+
from research.robustness.protocols.kuramoto_candidate_set import (
10+
CandidateSetInflationError,
11+
assert_anchor_covers_candidates,
12+
validate_candidate_parameter_names,
13+
)
14+
15+
16+
def test_legit_names_accepted() -> None:
17+
validate_candidate_parameter_names({"cost_bps": 0.1, "vol_target_annualised": 0.05})
18+
19+
20+
@pytest.mark.parametrize(
21+
"bad",
22+
[
23+
"seed_value",
24+
"random_noise_scale",
25+
"jitter_extra",
26+
],
27+
)
28+
def test_forbidden_prefixes_rejected(bad: str) -> None:
29+
with pytest.raises(CandidateSetInflationError):
30+
validate_candidate_parameter_names({bad: 0.1})
31+
32+
33+
def test_multiple_offenders_listed_together() -> None:
34+
with pytest.raises(CandidateSetInflationError) as exc:
35+
validate_candidate_parameter_names({"seed_a": 0.1, "random_b": 0.1, "cost_bps": 0.2})
36+
msg = str(exc.value)
37+
assert "seed_a" in msg
38+
assert "random_b" in msg
39+
assert "cost_bps" not in msg
40+
41+
42+
def test_missing_anchor_key_rejected() -> None:
43+
with pytest.raises(CandidateSetInflationError):
44+
assert_anchor_covers_candidates(
45+
{"cost_bps": 1.0},
46+
{"nonexistent": 0.1},
47+
)
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# Copyright (c) 2023-2026 Yaroslav Vasylenko (neuron7xLab)
2+
# SPDX-License-Identifier: MIT
3+
"""Contract loader tests — hash verification is fail-closed."""
4+
5+
from __future__ import annotations
6+
7+
import json
8+
import shutil
9+
from pathlib import Path
10+
11+
import pytest
12+
13+
from research.robustness.protocols.kuramoto_contract import (
14+
FrozenArtifactManifest,
15+
FrozenArtifactMismatch,
16+
KuramotoRobustnessContract,
17+
)
18+
19+
REPO = Path(__file__).resolve().parents[3]
20+
21+
22+
class TestFrozenArtifactManifest:
23+
def test_loads_real_manifest(self) -> None:
24+
manifest = FrozenArtifactManifest.load()
25+
assert manifest.generated_utc
26+
assert len(manifest.hashes) == 28
27+
28+
def test_missing_manifest_raises(self, tmp_path: Path) -> None:
29+
with pytest.raises(FrozenArtifactMismatch):
30+
FrozenArtifactManifest.load(tmp_path / "nope.json")
31+
32+
def test_hash_mismatch_raises_fail_closed(self, tmp_path: Path) -> None:
33+
# Snapshot one real artifact into tmp_path, bump its hash in the
34+
# manifest copy, point the manifest's repo_root at tmp_path and
35+
# verify_all() must raise.
36+
src_file = REPO / "results" / "cross_asset_kuramoto" / "demo" / "DEMO_BRIEF.md"
37+
rel = "results/cross_asset_kuramoto/demo/DEMO_BRIEF.md"
38+
dst_file = tmp_path / rel
39+
dst_file.parent.mkdir(parents=True, exist_ok=True)
40+
shutil.copy(src_file, dst_file)
41+
42+
manifest = FrozenArtifactManifest(
43+
generated_utc="2026-01-01T00:00:00Z",
44+
regenerated_utc=None,
45+
hashes={rel: "0" * 64},
46+
repo_root=tmp_path,
47+
)
48+
with pytest.raises(FrozenArtifactMismatch) as exc:
49+
manifest.verify_all()
50+
assert "sha256 mismatch" in str(exc.value)
51+
52+
def test_missing_file_reports_missing(self, tmp_path: Path) -> None:
53+
manifest = FrozenArtifactManifest(
54+
generated_utc="2026-01-01T00:00:00Z",
55+
regenerated_utc=None,
56+
hashes={"does/not/exist.txt": "0" * 64},
57+
repo_root=tmp_path,
58+
)
59+
with pytest.raises(FrozenArtifactMismatch) as exc:
60+
manifest.verify_all()
61+
assert "missing" in str(exc.value)
62+
63+
64+
class TestKuramotoRobustnessContract:
65+
def test_from_frozen_artifacts_loads_real_bundle(self) -> None:
66+
c = KuramotoRobustnessContract.from_frozen_artifacts()
67+
assert len(c.equity_curve) > 1000
68+
assert len(c.fold_metrics) >= 2
69+
assert {"sharpe", "ann_vol"} <= set(c.risk_metrics.columns)
70+
assert c.parameter_lock["seed"] == 42
71+
72+
def test_daily_returns_have_expected_length(self) -> None:
73+
c = KuramotoRobustnessContract.from_frozen_artifacts()
74+
r = c.daily_strategy_returns()
75+
assert len(r) == len(c.equity_curve) - 1
76+
77+
def test_fold_metrics_contract_violation_is_caught(self, tmp_path: Path) -> None:
78+
# Build a manifest whose frozen fold_metrics.csv is missing the
79+
# 'sharpe' column — assert_frozen_consistency must refuse it.
80+
rel_equity = "results/cross_asset_kuramoto/demo/equity_curve.csv"
81+
rel_folds = "results/cross_asset_kuramoto/demo/fold_metrics.csv"
82+
rel_risk = "results/cross_asset_kuramoto/demo/risk_metrics.csv"
83+
rel_lock = "results/cross_asset_kuramoto/PARAMETER_LOCK.json"
84+
85+
(tmp_path / "results" / "cross_asset_kuramoto" / "demo").mkdir(parents=True, exist_ok=True)
86+
(tmp_path / rel_equity).write_text(
87+
"date,strategy_cumret,benchmark_cumret,drawdown\n"
88+
"2020-01-01,1.0,1.0,-0.0\n2020-01-02,1.01,1.0,-0.0\n"
89+
"2020-01-03,1.02,1.0,-0.0\n"
90+
)
91+
(tmp_path / rel_folds).write_text(
92+
"fold_id,is_start,os_start,os_end,NOT_SHARPE,max_dd,pass_fail\n"
93+
"1,2020-01-01,2020-02-01,2020-03-01,1.0,-0.1,PASS\n"
94+
"2,2020-01-01,2020-03-01,2020-04-01,0.5,-0.1,PASS\n"
95+
)
96+
(tmp_path / rel_risk).write_text("sharpe,ann_return,ann_vol,max_dd\n1.0,0.1,0.1,-0.1\n")
97+
(tmp_path / rel_lock).write_text(json.dumps({"seed": 42}))
98+
manifest_path = tmp_path / "manifest.json"
99+
import hashlib
100+
101+
hashes = {
102+
rel: hashlib.sha256((tmp_path / rel).read_bytes()).hexdigest()
103+
for rel in (rel_equity, rel_folds, rel_risk, rel_lock)
104+
}
105+
manifest_path.write_text(
106+
json.dumps(
107+
{"generated_utc": "x", "hashes": hashes},
108+
indent=2,
109+
)
110+
)
111+
with pytest.raises(FrozenArtifactMismatch) as exc:
112+
KuramotoRobustnessContract.from_frozen_artifacts(
113+
manifest_path=manifest_path,
114+
repo_root=tmp_path,
115+
)
116+
assert "fold_metrics" in str(exc.value)
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright (c) 2023-2026 Yaroslav Vasylenko (neuron7xLab)
2+
# SPDX-License-Identifier: MIT
3+
"""End-to-end gate-runner + decision-layer tests."""
4+
5+
from __future__ import annotations
6+
7+
from dataclasses import dataclass
8+
from pathlib import Path
9+
10+
from backtest.robustness_gates import DecisionLabel, evaluate_robustness_gates
11+
from research.robustness.protocols.kuramoto_contract import KuramotoRobustnessContract
12+
from research.robustness.protocols.kuramoto_gate_runner import run_kuramoto_gate_runner
13+
14+
15+
@dataclass(frozen=True)
16+
class _FakeCPCV:
17+
pbo_pass: bool
18+
psr_pass: bool
19+
annualised_sharpe: float
20+
n_folds: int
21+
22+
23+
@dataclass(frozen=True)
24+
class _FakeNull:
25+
all_families_pass: bool
26+
27+
28+
@dataclass(frozen=True)
29+
class _FakeJitter:
30+
evaluator_mode: str
31+
fraction_within_tol_pass: bool
32+
33+
34+
@dataclass(frozen=True)
35+
class _FakeEvidence:
36+
cpcv: _FakeCPCV
37+
null: _FakeNull
38+
jitter: _FakeJitter
39+
40+
41+
class TestDecisionLayer:
42+
def _ev(
43+
self,
44+
*,
45+
pbo_pass: bool = True,
46+
psr_pass: bool = True,
47+
annualised_sharpe: float = 1.2,
48+
n_folds: int = 5,
49+
null_pass: bool = True,
50+
jitter_mode: str = "LIVE",
51+
jitter_pass: bool = True,
52+
) -> _FakeEvidence:
53+
return _FakeEvidence(
54+
cpcv=_FakeCPCV(
55+
pbo_pass=pbo_pass,
56+
psr_pass=psr_pass,
57+
annualised_sharpe=annualised_sharpe,
58+
n_folds=n_folds,
59+
),
60+
null=_FakeNull(all_families_pass=null_pass),
61+
jitter=_FakeJitter(
62+
evaluator_mode=jitter_mode,
63+
fraction_within_tol_pass=jitter_pass,
64+
),
65+
)
66+
67+
def test_all_green_gives_pass(self) -> None:
68+
r = evaluate_robustness_gates(self._ev())
69+
assert r.label is DecisionLabel.PASS
70+
assert r.reasons == ()
71+
72+
def test_pbo_red_gives_fail(self) -> None:
73+
r = evaluate_robustness_gates(self._ev(pbo_pass=False))
74+
assert r.label is DecisionLabel.FAIL
75+
assert any("PBO" in reason for reason in r.reasons)
76+
77+
def test_psr_red_gives_fail(self) -> None:
78+
r = evaluate_robustness_gates(self._ev(psr_pass=False))
79+
assert r.label is DecisionLabel.FAIL
80+
assert any("PSR" in reason for reason in r.reasons)
81+
82+
def test_null_red_gives_fail(self) -> None:
83+
r = evaluate_robustness_gates(self._ev(null_pass=False))
84+
assert r.label is DecisionLabel.FAIL
85+
86+
def test_placeholder_jitter_with_require_live_demotes_to_insufficient(
87+
self,
88+
) -> None:
89+
r = evaluate_robustness_gates(
90+
self._ev(jitter_mode="PLACEHOLDER_APPROXIMATION"),
91+
require_live_jitter=True,
92+
)
93+
assert r.label is DecisionLabel.INSUFFICIENT_EVIDENCE
94+
assert r.jitter_is_placeholder
95+
96+
def test_placeholder_jitter_without_require_live_can_pass(self) -> None:
97+
r = evaluate_robustness_gates(
98+
self._ev(jitter_mode="PLACEHOLDER_APPROXIMATION"),
99+
require_live_jitter=False,
100+
)
101+
assert r.label is DecisionLabel.PASS
102+
assert r.jitter_is_placeholder
103+
104+
def test_single_fold_gives_insufficient(self) -> None:
105+
r = evaluate_robustness_gates(self._ev(n_folds=1))
106+
assert r.label is DecisionLabel.INSUFFICIENT_EVIDENCE
107+
108+
def test_live_jitter_failure_is_fail(self) -> None:
109+
r = evaluate_robustness_gates(self._ev(jitter_mode="LIVE", jitter_pass=False))
110+
assert r.label is DecisionLabel.FAIL
111+
112+
113+
class TestGateRunnerEndToEnd:
114+
def test_pipeline_produces_all_three_suites(self) -> None:
115+
contract = KuramotoRobustnessContract.from_frozen_artifacts()
116+
evidence = run_kuramoto_gate_runner(
117+
contract,
118+
null_kwargs={"n_bootstrap": 32},
119+
jitter_kwargs={"n_candidates": 8},
120+
)
121+
assert evidence.cpcv.n_folds >= 2
122+
assert len(evidence.null.families) == 2
123+
assert evidence.jitter.evaluator_mode == "PLACEHOLDER_APPROXIMATION"
124+
125+
def test_decision_matches_evidence(self) -> None:
126+
contract = KuramotoRobustnessContract.from_frozen_artifacts()
127+
evidence = run_kuramoto_gate_runner(
128+
contract,
129+
null_kwargs={"n_bootstrap": 32, "seed": 42},
130+
jitter_kwargs={"n_candidates": 8},
131+
)
132+
decision = evaluate_robustness_gates(evidence)
133+
# With only 32 bootstraps and proxy returns the null suite
134+
# almost certainly fails at the 5 % threshold, so the runner
135+
# must report a FAIL verdict on the frozen demo evidence.
136+
assert decision.label is DecisionLabel.FAIL
137+
assert not decision.null_pass
138+
assert any("null" in reason for reason in decision.reasons)
139+
140+
141+
def test_cli_writes_expected_artifacts(tmp_path: Path) -> None:
142+
from scripts import run_kuramoto_robustness_v1 as cli # noqa: PLC0415
143+
144+
cwd_out = tmp_path / "robustness_v1"
145+
rc = cli.main(
146+
[
147+
"--n-bootstrap",
148+
"32",
149+
"--n-jitter-candidates",
150+
"8",
151+
"--out-dir",
152+
str(cwd_out),
153+
]
154+
)
155+
assert rc in (0, 1)
156+
for name in (
157+
"verdict.json",
158+
"cpcv_summary.json",
159+
"null_summary.json",
160+
"jitter_summary.json",
161+
"ROBUSTNESS_v1.md",
162+
):
163+
assert (cwd_out / name).is_file()

0 commit comments

Comments
 (0)