|
| 1 | +"""Betti-1 topology signal on Askar's 57-asset extended hourly panel. |
| 2 | +
|
| 3 | +Topologically distinct from Forman-Ricci (PR #194/#197): Ricci is a |
| 4 | +local per-edge curvature, B₁ is a global invariant counting |
| 5 | +independent cycles in the 1-skeleton of the Vietoris-Rips complex |
| 6 | +built from the rolling correlation graph. |
| 7 | +
|
| 8 | +Grounding: Gidea & Katz (2018) "Topological data analysis of financial |
| 9 | +time series" — Betti-1 shows documented pre-crash lift 10-20 days |
| 10 | +ahead of equity index drawdowns, which lines up with the 10-30 bar |
| 11 | +lead-capture window we track in the stress-detector gate. |
| 12 | +
|
| 13 | +Formula (1-skeleton, O(edges) per bar, numpy + scipy only): |
| 14 | +
|
| 15 | + B₁(t) = |E(t)| − |V| + k(t) |
| 16 | +
|
| 17 | + where |
| 18 | + E(t) = set of edges with |corr(returns[t−W:t])| > threshold |
| 19 | + V = 57 (number of panel assets, constant) |
| 20 | + k(t) = number of connected components of the adjacency matrix |
| 21 | + (scipy.sparse.csgraph.connected_components) |
| 22 | +
|
| 23 | +Same 3-D gate as ricci_wide_panel_final: |
| 24 | + DETECT IC(B₁, fwd_SPX_1h) ≥ 0.08 AND permutation p < 0.10 |
| 25 | + DISCRIMINATE |corr(B₁, mom_20)| < 0.15 AND |corr(B₁, vol_10)| < 0.15 |
| 26 | + DELIVER alerts(B₁ > expanding Q90) lead ≥ 60 % of |
| 27 | + future 20-bar cumulative SPX drawdown events (< −5 %) |
| 28 | +
|
| 29 | +Output: results/betti1_verdict.json |
| 30 | +""" |
| 31 | + |
| 32 | +from __future__ import annotations |
| 33 | + |
| 34 | +import json |
| 35 | +from pathlib import Path |
| 36 | +from typing import Any |
| 37 | + |
| 38 | +import numpy as np |
| 39 | +import pandas as pd |
| 40 | +from scipy.sparse.csgraph import connected_components |
| 41 | +from scipy.stats import spearmanr |
| 42 | + |
| 43 | +REPO_ROOT = Path(__file__).resolve().parents[2] |
| 44 | +EXTENDED_PANEL_PATH = REPO_ROOT / "data" / "askar_full" / "panel_hourly_extended.parquet" |
| 45 | +VERDICT_PATH = REPO_ROOT / "results" / "betti1_verdict.json" |
| 46 | + |
| 47 | +WINDOW = 60 |
| 48 | +THRESHOLD = 0.30 |
| 49 | +MOMENTUM_WINDOW = 20 |
| 50 | +VOL_WINDOW = 10 |
| 51 | +PERMUTATIONS = 500 |
| 52 | +LEAD_FWD_WINDOW = 20 |
| 53 | +LEAD_LOOKBACK_MAX = 30 |
| 54 | +LEAD_LOOKBACK_MIN = 10 |
| 55 | +DRAWDOWN_THRESHOLD = -0.05 |
| 56 | + |
| 57 | +IC_GATE = 0.08 |
| 58 | +P_GATE = 0.10 |
| 59 | +CORR_FACTOR_GATE = 0.15 |
| 60 | +LEAD_CAPTURE_GATE = 0.60 |
| 61 | + |
| 62 | +TARGET_COL = "USA_500_Index" |
| 63 | +VIX_LIKE = "VIX" |
| 64 | +HYG_LIKE = "High_Yield" |
| 65 | + |
| 66 | + |
| 67 | +# -------------------------------------------------------------------- # |
| 68 | +# Core computation: B₁ = |E| − |V| + k |
| 69 | +# -------------------------------------------------------------------- # |
| 70 | + |
| 71 | + |
| 72 | +def compute_betti1( |
| 73 | + returns: pd.DataFrame, |
| 74 | + window: int = WINDOW, |
| 75 | + threshold: float = THRESHOLD, |
| 76 | +) -> pd.Series: |
| 77 | + """Rolling first Betti number of the thresholded correlation graph. |
| 78 | +
|
| 79 | + B₁ is the dimension of the 1-cycle space of the 1-skeleton: |
| 80 | + B₁ = |E| − |V| + k |
| 81 | + where k is the number of connected components. Computed in |
| 82 | + O(edges) per bar via scipy.sparse.csgraph.connected_components. |
| 83 | + """ |
| 84 | + vals = returns.to_numpy(dtype=float) |
| 85 | + n_bars, n_assets = vals.shape |
| 86 | + out = pd.Series(np.nan, index=returns.index, dtype=float) |
| 87 | + if n_bars < window or n_assets < 2: |
| 88 | + return out |
| 89 | + |
| 90 | + for t in range(window - 1, n_bars): |
| 91 | + w = vals[t - window + 1 : t + 1] |
| 92 | + corr = np.corrcoef(w, rowvar=False) |
| 93 | + corr = np.nan_to_num(corr, nan=0.0) |
| 94 | + adj = (np.abs(corr) > threshold).astype(int) |
| 95 | + np.fill_diagonal(adj, 0) |
| 96 | + edges = int(adj.sum() // 2) |
| 97 | + n_comp_raw, _ = connected_components(adj, directed=False) |
| 98 | + n_comp = int(n_comp_raw) |
| 99 | + out.iloc[t] = float(edges - n_assets + n_comp) |
| 100 | + return out |
| 101 | + |
| 102 | + |
| 103 | +# -------------------------------------------------------------------- # |
| 104 | +# Statistical primitives (inline, zero-dependency beyond scipy) |
| 105 | +# -------------------------------------------------------------------- # |
| 106 | + |
| 107 | + |
| 108 | +def _scorr(a: pd.Series, b: pd.Series) -> float: |
| 109 | + frame = pd.concat([a, b], axis=1).dropna() |
| 110 | + if len(frame) < 30: |
| 111 | + return 0.0 |
| 112 | + rho, _ = spearmanr(frame.iloc[:, 0], frame.iloc[:, 1]) |
| 113 | + return float(rho) if np.isfinite(rho) else 0.0 |
| 114 | + |
| 115 | + |
| 116 | +def _permutation_pvalue( |
| 117 | + signal: pd.Series, |
| 118 | + target: pd.Series, |
| 119 | + permutations: int = PERMUTATIONS, |
| 120 | + seed: int = 42, |
| 121 | +) -> float: |
| 122 | + frame = pd.concat([signal, target], axis=1).dropna() |
| 123 | + if len(frame) < 30: |
| 124 | + return 1.0 |
| 125 | + x = frame.iloc[:, 0].to_numpy(dtype=float) |
| 126 | + y = frame.iloc[:, 1].to_numpy(dtype=float) |
| 127 | + obs_raw = float(spearmanr(x, y).statistic) |
| 128 | + obs = abs(obs_raw) if np.isfinite(obs_raw) else 0.0 |
| 129 | + rng = np.random.default_rng(seed) |
| 130 | + count = 0 |
| 131 | + for _ in range(permutations): |
| 132 | + y_perm = rng.permutation(y) |
| 133 | + s_raw = float(spearmanr(x, y_perm).statistic) |
| 134 | + s = abs(s_raw) if np.isfinite(s_raw) else 0.0 |
| 135 | + if s >= obs: |
| 136 | + count += 1 |
| 137 | + return float((count + 1) / (permutations + 1)) |
| 138 | + |
| 139 | + |
| 140 | +def _lead_capture(b1: pd.Series, fwd_cum: pd.Series, threshold: float) -> tuple[float, int, int]: |
| 141 | + alerts = b1 > b1.expanding().quantile(0.90) |
| 142 | + drawdown_events = fwd_cum[fwd_cum < threshold] |
| 143 | + if len(drawdown_events) == 0: |
| 144 | + return 0.0, 0, 0 |
| 145 | + captured = 0 |
| 146 | + for ts in drawdown_events.index: |
| 147 | + loc = alerts.index.get_loc(ts) |
| 148 | + if not isinstance(loc, int): |
| 149 | + continue |
| 150 | + lo = max(0, int(loc) - LEAD_LOOKBACK_MAX) |
| 151 | + hi = max(0, int(loc) - LEAD_LOOKBACK_MIN) |
| 152 | + if hi > lo and bool(alerts.iloc[lo:hi].any()): |
| 153 | + captured += 1 |
| 154 | + return float(captured / len(drawdown_events)), captured, int(len(drawdown_events)) |
| 155 | + |
| 156 | + |
| 157 | +# -------------------------------------------------------------------- # |
| 158 | +# Orchestration |
| 159 | +# -------------------------------------------------------------------- # |
| 160 | + |
| 161 | + |
| 162 | +def run() -> dict[str, Any]: |
| 163 | + if not EXTENDED_PANEL_PATH.exists(): |
| 164 | + raise FileNotFoundError( |
| 165 | + f"PRIME_ARCHITECT HALT: missing extended panel: {EXTENDED_PANEL_PATH}" |
| 166 | + ) |
| 167 | + prices = pd.read_parquet(EXTENDED_PANEL_PATH) |
| 168 | + prices.index = pd.to_datetime(prices.index) |
| 169 | + prices = prices.sort_index() |
| 170 | + |
| 171 | + ratio = prices / prices.shift(1) |
| 172 | + log_arr = np.log(ratio.to_numpy()) |
| 173 | + returns = ( |
| 174 | + pd.DataFrame(log_arr, index=ratio.index, columns=ratio.columns) |
| 175 | + .replace([np.inf, -np.inf], np.nan) |
| 176 | + .dropna() |
| 177 | + ) |
| 178 | + |
| 179 | + b1 = compute_betti1(returns, window=WINDOW, threshold=THRESHOLD) |
| 180 | + target = returns[TARGET_COL].shift(-1) |
| 181 | + |
| 182 | + momentum_20 = returns[TARGET_COL].rolling(MOMENTUM_WINDOW).sum() |
| 183 | + vol_10 = returns[TARGET_COL].rolling(VOL_WINDOW).std() |
| 184 | + |
| 185 | + vix_frame = returns.filter(like=VIX_LIKE) |
| 186 | + hyg_frame = returns.filter(like=HYG_LIKE) |
| 187 | + vix_ret = ( |
| 188 | + vix_frame.mean(axis=1) |
| 189 | + if vix_frame.shape[1] > 0 |
| 190 | + else pd.Series(np.nan, index=returns.index, dtype=float) |
| 191 | + ) |
| 192 | + hyg_ret = ( |
| 193 | + hyg_frame.mean(axis=1) |
| 194 | + if hyg_frame.shape[1] > 0 |
| 195 | + else pd.Series(np.nan, index=returns.index, dtype=float) |
| 196 | + ) |
| 197 | + |
| 198 | + ic = _scorr(b1, target) |
| 199 | + p_value = _permutation_pvalue(b1, target, permutations=PERMUTATIONS) |
| 200 | + corr_m = _scorr(b1, momentum_20) |
| 201 | + corr_v = _scorr(b1, vol_10) |
| 202 | + corr_vix = _scorr(b1, vix_ret) |
| 203 | + corr_hyg = _scorr(b1, hyg_ret) |
| 204 | + |
| 205 | + fwd_cum = returns[TARGET_COL].rolling(LEAD_FWD_WINDOW).sum().shift(-LEAD_FWD_WINDOW) |
| 206 | + lead_capture, captured_n, event_n = _lead_capture(b1, fwd_cum, DRAWDOWN_THRESHOLD) |
| 207 | + |
| 208 | + detect = bool(np.isfinite(ic) and ic >= IC_GATE and p_value < P_GATE) |
| 209 | + discriminate = bool( |
| 210 | + np.isfinite(corr_m) |
| 211 | + and np.isfinite(corr_v) |
| 212 | + and abs(corr_m) < CORR_FACTOR_GATE |
| 213 | + and abs(corr_v) < CORR_FACTOR_GATE |
| 214 | + ) |
| 215 | + deliver = bool(lead_capture >= LEAD_CAPTURE_GATE) |
| 216 | + final_pass = detect and discriminate and deliver |
| 217 | + |
| 218 | + verdict = { |
| 219 | + "substrate": { |
| 220 | + "source": str(EXTENDED_PANEL_PATH.relative_to(REPO_ROOT)), |
| 221 | + "n_assets": int(returns.shape[1]), |
| 222 | + "n_bars": int(len(returns)), |
| 223 | + "first_ts": str(returns.index.min()), |
| 224 | + "last_ts": str(returns.index.max()), |
| 225 | + "window": WINDOW, |
| 226 | + "threshold": THRESHOLD, |
| 227 | + "permutations": PERMUTATIONS, |
| 228 | + }, |
| 229 | + "b1_stats": { |
| 230 | + "mean": float(b1.dropna().mean()) if b1.notna().any() else None, |
| 231 | + "median": float(b1.dropna().median()) if b1.notna().any() else None, |
| 232 | + "p05": float(b1.dropna().quantile(0.05)) if b1.notna().any() else None, |
| 233 | + "p95": float(b1.dropna().quantile(0.95)) if b1.notna().any() else None, |
| 234 | + "max": float(b1.dropna().max()) if b1.notna().any() else None, |
| 235 | + }, |
| 236 | + "IC": round(float(ic), 6), |
| 237 | + "p_value": round(float(p_value), 6), |
| 238 | + "corr_momentum": round(float(corr_m), 6), |
| 239 | + "corr_vol": round(float(corr_v), 6), |
| 240 | + "corr_vix": round(float(corr_vix), 6), |
| 241 | + "corr_hyg": round(float(corr_hyg), 6), |
| 242 | + "lead_capture": round(float(lead_capture), 6), |
| 243 | + "lead_capture_detail": { |
| 244 | + "captured": captured_n, |
| 245 | + "drawdown_events": event_n, |
| 246 | + "drawdown_threshold": DRAWDOWN_THRESHOLD, |
| 247 | + "fwd_window": LEAD_FWD_WINDOW, |
| 248 | + "lookback_bars": [LEAD_LOOKBACK_MIN, LEAD_LOOKBACK_MAX], |
| 249 | + }, |
| 250 | + "DETECT": "PASS" if detect else "FAIL", |
| 251 | + "DISCRIMINATE": "PASS" if discriminate else "FAIL", |
| 252 | + "DELIVER": "PASS" if deliver else "FAIL", |
| 253 | + "FINAL": "SIGNAL_READY" if final_pass else "REJECT", |
| 254 | + } |
| 255 | + |
| 256 | + VERDICT_PATH.parent.mkdir(parents=True, exist_ok=True) |
| 257 | + VERDICT_PATH.write_text(json.dumps(verdict, indent=2)) |
| 258 | + print(json.dumps(verdict, indent=2)) |
| 259 | + return verdict |
| 260 | + |
| 261 | + |
| 262 | +if __name__ == "__main__": |
| 263 | + run() |
0 commit comments