Skip to content

Commit 5df09ed

Browse files
neuron7xLabclaude
andcommitted
feat(robustness): wire LOO grid into CPCV suite — real PBO=0.20 replaces trivial mirror
Self-audit finding: the fold-mirror PBO was structurally trivial (=0.00) because a 2-column matrix with a median-shifted mirror always picks the same best IS strategy. The offline-robustness packet already ships a 13×5 LOO grid at results/cross_asset_kuramoto/offline_robustness/leave_one_asset_out.csv (13 asset-LOO perturbations × 5 walk-forward folds) — this is a bona-fide OOS matrix for Bailey et al. (2017) PBO estimation. Changes: - kuramoto_contract.py — optional loo_grid field on the contract; inline LOO_GRID_SHA256 constant for fail-closed hash verification outside the 28-entry SOURCE_HASHES.json contract (additive, SOURCE_HASHES untouched). Missing file is tolerated (loo_grid=None); present-but-mismatched file raises FrozenArtifactMismatch. - kuramoto_cpcv_suite.py — _loo_oos_matrix() builds (folds × strategies) from non-baseline LOO rows; estimate_pbo() runs on it when present. KuramotoCPCVResult now carries loo_pbo (float|None), loo_pbo_pass, loo_n_strategies alongside the existing fields. - backtest/robustness_gates.py — _CPCVEvidence Protocol gains loo_pbo_pass; evaluate_robustness_gates() includes it in cpcv_pass conjunction. - CLI + ROBUSTNESS_v1.md now surface 'CPCV | PBO (LOO grid, n=13) | 0.2000 ✓'. First-run evidence on the frozen bundle: PBO (fold mirror): 0.0000 (trivial, as before — kept for continuity) PBO (LOO grid): 0.2000 (13 strategies × 5 folds — real estimator) best-IS each fold: tradable:TLT × 5 (OOS ranks 6, 13, 14, 14, 14) Interpretation: 1/5 folds has best-IS below-median OOS → 20 % overfit probability on the LOO family. Consistent with SEPARATION_FINDING.md ('drop TLT → Sharpe 1.26 → 1.73'): the TLT-drop variant is genuinely best on 4 of 5 folds, not a lucky pick. Tests: - test_loo_pbo_present_and_bounded — loo_pbo ∈ [0, 1], n=13. - test_loo_pbo_matches_hand_computed — regression pin at 0.20. - test_loo_pbo_red_gives_fail — decision layer correctly propagates loo_pbo_pass=False to FAIL. - Existing _FakeCPCV fixture gained loo_pbo_pass: bool = True default so existing decision-layer tests stay green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 81762cd commit 5df09ed

9 files changed

Lines changed: 944 additions & 211 deletions

File tree

backtest/robustness_gates.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ def psr_pass(self) -> bool: ...
3838
def annualised_sharpe(self) -> float: ...
3939
@property
4040
def n_folds(self) -> int: ...
41+
@property
42+
def loo_pbo_pass(self) -> bool: ...
4143

4244

4345
@runtime_checkable
@@ -94,11 +96,15 @@ def evaluate_robustness_gates(
9496
passing, or placeholder with ``require_live_jitter`` False.
9597
"""
9698
reasons: list[str] = []
97-
cpcv_pass = bool(evidence.cpcv.pbo_pass and evidence.cpcv.psr_pass)
99+
cpcv_pass = bool(
100+
evidence.cpcv.pbo_pass and evidence.cpcv.psr_pass and evidence.cpcv.loo_pbo_pass
101+
)
98102
if not evidence.cpcv.pbo_pass:
99103
reasons.append("cpcv: PBO above threshold")
100104
if not evidence.cpcv.psr_pass:
101105
reasons.append("cpcv: PSR below threshold")
106+
if not evidence.cpcv.loo_pbo_pass:
107+
reasons.append("cpcv: LOO-grid PBO above threshold")
102108

103109
null_pass = bool(evidence.null.all_families_pass)
104110
if not null_pass:

research/robustness/protocols/kuramoto_contract.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,34 @@
3131
DEMO_RISK_REL: Final[str] = "results/cross_asset_kuramoto/demo/risk_metrics.csv"
3232
PARAM_LOCK_REL: Final[str] = "results/cross_asset_kuramoto/PARAMETER_LOCK.json"
3333

34+
# Extension manifest: hash-verified auxiliary artifacts that are *outside*
35+
# the original 28-artifact SOURCE_HASHES.json contract. Adding a new
36+
# auxiliary input requires appending here AND in the inline hash constant
37+
# below — both must move together, so drift is caught at load time.
38+
LOO_GRID_REL: Final[str] = "results/cross_asset_kuramoto/offline_robustness/leave_one_asset_out.csv"
39+
LOO_GRID_SHA256: Final[str] = "9fdb19129630bddcda7499cd6a1ec20b68b34715d8c52c72bd286676e8156a61"
40+
3441

3542
class FrozenArtifactMismatch(RuntimeError):
3643
"""Raised when a frozen artifact fails sha256 verification or is missing."""
3744

3845

46+
def _load_loo_grid_if_present(repo_root: Path) -> pd.DataFrame | None:
47+
"""Load + hash-verify the LOO grid; return None if the file is absent.
48+
49+
A present-but-mismatched file is fail-closed (raises
50+
:class:`FrozenArtifactMismatch`); a missing file is tolerated so the
51+
framework can run on a minimal frozen bundle.
52+
"""
53+
path = repo_root / LOO_GRID_REL
54+
if not path.is_file():
55+
return None
56+
sha = hashlib.sha256(path.read_bytes()).hexdigest()
57+
if sha != LOO_GRID_SHA256:
58+
raise FrozenArtifactMismatch(f"{LOO_GRID_REL}: got {sha}, expected {LOO_GRID_SHA256}")
59+
return pd.read_csv(path)
60+
61+
3962
@dataclass(frozen=True)
4063
class FrozenArtifactManifest:
4164
"""Parsed SOURCE_HASHES.json plus the repo root used to resolve rels."""
@@ -91,14 +114,22 @@ class KuramotoRobustnessContract:
91114
fold_metrics: pd.DataFrame
92115
risk_metrics: pd.DataFrame
93116
parameter_lock: dict[str, object]
117+
loo_grid: pd.DataFrame | None = None
94118

95119
@classmethod
96120
def from_frozen_artifacts(
97121
cls,
98122
manifest_path: Path = DEFAULT_MANIFEST,
99123
repo_root: Path = REPO_ROOT,
100124
) -> KuramotoRobustnessContract:
101-
"""Load contract and verify hashes in one fail-closed step."""
125+
"""Load contract and verify hashes in one fail-closed step.
126+
127+
Also loads the optional leave-one-asset-out grid when the file
128+
is present on disk; its sha256 is verified against the inline
129+
:data:`LOO_GRID_SHA256` constant. A missing LOO file is tolerated
130+
(``loo_grid`` stays None); a present-but-mismatched file is a
131+
fail-closed error.
132+
"""
102133
manifest = FrozenArtifactManifest.load(manifest_path, repo_root)
103134
manifest.verify_all()
104135

@@ -112,12 +143,14 @@ def from_frozen_artifacts(
112143
)
113144
risk = pd.read_csv(repo_root / DEMO_RISK_REL)
114145
param_lock = json.loads((repo_root / PARAM_LOCK_REL).read_text(encoding="utf-8"))
146+
loo_grid = _load_loo_grid_if_present(repo_root)
115147
contract = cls(
116148
manifest=manifest,
117149
equity_curve=equity,
118150
fold_metrics=folds,
119151
risk_metrics=risk,
120152
parameter_lock=param_lock,
153+
loo_grid=loo_grid,
121154
)
122155
contract.assert_frozen_consistency()
123156
return contract
@@ -136,6 +169,22 @@ def assert_frozen_consistency(self) -> None:
136169
raise FrozenArtifactMismatch(
137170
f"fold_metrics.csv needs at least 2 folds, has {len(self.fold_metrics)}"
138171
)
172+
if self.loo_grid is not None:
173+
required_loo = {
174+
"loo_type",
175+
"omitted_asset",
176+
"oos_sharpe",
177+
"fold1",
178+
"fold2",
179+
"fold3",
180+
"fold4",
181+
"fold5",
182+
}
183+
missing = required_loo - set(self.loo_grid.columns)
184+
if missing:
185+
raise FrozenArtifactMismatch(
186+
f"leave_one_asset_out.csv missing columns: {sorted(missing)}"
187+
)
139188

140189
def daily_strategy_returns(self) -> pd.Series:
141190
"""Strategy daily returns from ``strategy_cumret`` (pct_change)."""

research/robustness/protocols/kuramoto_cpcv_suite.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@
88
from typing import Final
99

1010
import numpy as np
11+
import pandas as pd
12+
from numpy.typing import NDArray
1113

1214
from research.robustness.cpcv import estimate_pbo, probabilistic_sharpe_ratio
1315

1416
from .kuramoto_contract import KuramotoRobustnessContract
1517

1618
PBO_PASS_THRESHOLD: Final[float] = 0.50
1719
PSR_PASS_THRESHOLD: Final[float] = 0.95
20+
LOO_PBO_PASS_THRESHOLD: Final[float] = 0.50
1821

1922

2023
@dataclass(frozen=True)
@@ -29,9 +32,12 @@ class KuramotoCPCVResult:
2932
annualised_sharpe: float
3033
n_bars: int
3134
n_folds: int
35+
loo_pbo: float | None
36+
loo_pbo_pass: bool
37+
loo_n_strategies: int
3238

3339

34-
def _fold_oos_matrix(fold_sharpes: tuple[float, ...]) -> np.ndarray:
40+
def _fold_oos_matrix(fold_sharpes: tuple[float, ...]) -> NDArray[np.float64]:
3541
"""Build an OOS matrix from per-fold Sharpe values for PBO estimation.
3642
3743
The frozen evidence bundle ships one Sharpe per walk-forward fold
@@ -45,6 +51,23 @@ def _fold_oos_matrix(fold_sharpes: tuple[float, ...]) -> np.ndarray:
4551
return np.column_stack([arr, mirror])
4652

4753

54+
def _loo_oos_matrix(loo_grid: pd.DataFrame) -> NDArray[np.float64]:
55+
"""Build an OOS matrix of (folds × strategies) from the LOO grid.
56+
57+
Each non-baseline LOO row is one strategy variant whose per-fold
58+
Sharpes live in columns ``fold1..fold5``. We transpose so rows are
59+
CPCV paths (folds) and columns are strategies, matching
60+
:func:`research.robustness.cpcv.estimate_pbo` shape expectations.
61+
The baseline row is excluded: including it would guarantee best-IS
62+
capture every time and trivialise the PBO.
63+
"""
64+
perturbations = loo_grid[loo_grid["loo_type"] != "baseline_full"]
65+
folds = perturbations[["fold1", "fold2", "fold3", "fold4", "fold5"]].to_numpy(dtype=np.float64)
66+
# folds shape is (n_strategies, n_folds); transpose to (n_folds, n_strategies)
67+
out: NDArray[np.float64] = folds.T
68+
return out
69+
70+
4871
def run_kuramoto_cpcv_suite(
4972
contract: KuramotoRobustnessContract,
5073
) -> KuramotoCPCVResult:
@@ -55,6 +78,10 @@ def run_kuramoto_cpcv_suite(
5578
pre-computed fold sharpes for PBO and the daily return stream for
5679
PSR. No re-simulation is performed — this suite is *read-only* on
5780
frozen artifacts by design.
81+
82+
When the contract carries the optional LOO grid, a *second* PBO is
83+
computed on the real (folds × LOO-perturbations) OOS matrix — this
84+
is the honest Bailey et al. PBO and is non-trivial by construction.
5885
"""
5986
daily = contract.daily_strategy_returns().to_numpy(dtype=np.float64)
6087
fold_sharpes = tuple(float(s) for s in contract.fold_metrics["sharpe"].to_numpy())
@@ -67,6 +94,17 @@ def run_kuramoto_cpcv_suite(
6794
pbo = estimate_pbo(oos)
6895
std = float(np.std(daily, ddof=1))
6996
sr = float(np.mean(daily) / std * np.sqrt(252)) if std > 0 and np.isfinite(std) else 0.0
97+
98+
loo_pbo: float | None = None
99+
loo_pbo_pass = True
100+
loo_n_strategies = 0
101+
if contract.loo_grid is not None:
102+
loo_oos = _loo_oos_matrix(contract.loo_grid)
103+
loo_n_strategies = int(loo_oos.shape[1])
104+
if loo_oos.shape[0] >= 2 and loo_n_strategies >= 2:
105+
loo_pbo = estimate_pbo(loo_oos)
106+
loo_pbo_pass = loo_pbo < LOO_PBO_PASS_THRESHOLD
107+
70108
return KuramotoCPCVResult(
71109
fold_sharpes=fold_sharpes,
72110
pbo=pbo,
@@ -76,4 +114,7 @@ def run_kuramoto_cpcv_suite(
76114
annualised_sharpe=sr,
77115
n_bars=int(daily.size),
78116
n_folds=len(fold_sharpes),
117+
loo_pbo=loo_pbo,
118+
loo_pbo_pass=loo_pbo_pass,
119+
loo_n_strategies=loo_n_strategies,
79120
)

results/cross_asset_kuramoto/robustness_v1/ROBUSTNESS_v1.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,12 @@ Terminal decision: **FAIL**
66

77
| Suite | Metric | Value | Pass |
88
|---|---|---:|:-:|
9-
| CPCV | PBO | 0.0000 ||
9+
| CPCV | PBO (fold mirror) | 0.0000 ||
1010
| CPCV | PSR (daily) | 1.0000 ||
1111
| CPCV | Annualised Sharpe (daily) | 0.5775 | n/a |
12-
| Null | iid_permutation p-value | 0.1095 ||
13-
| Null | stationary_bootstrap p-value | 0.4677 ||
12+
| CPCV | PBO (LOO grid, n=13) | 0.2000 ||
13+
| Null | iid_permutation p-value | 0.0878 ||
14+
| Null | stationary_bootstrap p-value | 0.5170 ||
1415
| Jitter | fraction_within_tol | 1.0000 ||
1516
| Jitter | evaluator_mode | `PLACEHOLDER_APPROXIMATION` | n/a |
1617

results/cross_asset_kuramoto/robustness_v1/cpcv_summary.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
0.942,
88
0.8507
99
],
10+
"loo_n_strategies": 13,
11+
"loo_pbo": 0.2,
12+
"loo_pbo_pass": true,
1013
"n_bars": 2166,
1114
"n_folds": 5,
1215
"pbo": 0.0,

0 commit comments

Comments
 (0)