Skip to content

Commit 84d728d

Browse files
neuron7xLabclaude
andauthored
research(askar): Betti-1 topology + Ricci wide-panel final — both REJECT, evidence-ledger entries (#225)
Closing two outstanding research cycles with honest negative results: ## 1. Ricci wide-panel final (d1f68a9) Forman-Ricci curvature on 57-asset extended hourly panel + VIX/HYG orthogonality gate. 3-D institutional verdict: DETECT IC = -0.0067, p = 0.86 → FAIL DISCRIMINATE corr(vol_10) = -0.22 → FAIL (|corr| > 0.15) DELIVER lead_capture = 0.0 → FAIL FINAL = REJECT. Stress detector flagged contradiction: "stress_signal_leaks_volatility" (orthogonality broken). ## 2. Betti-1 topology signal (11d63da) TDA cycle count on 1-skeleton of Vietoris-Rips complex, per Gidea & Katz (2018). Topologically distinct from Forman-Ricci (local curvature vs global invariant): DETECT IC = 0.014, p = 0.032 → FAIL DISCRIMINATE corr(vol_10) = 0.39 → FAIL DELIVER lead_capture = 0.078 → FAIL FINAL = REJECT. ## Why merge rejected signals? These are ledger entries in the unfalsifiable-fortress: documented refutations that prevent retrying the same approach under a new name. Modules remain as reusable primitives for future work (compute_betti1, wide-panel gate, residualised stress detector). ## Quality gates ruff check — clean on all 5 new files mypy --strict — no issues on 3 source files pytest tests/askar/ — 13/13 new tests pass ## Artifacts (8 files, +1,177 loc + 6 MB parquet) research/askar/betti_topology.py research/askar/ricci_wide_panel_final.py research/askar/stress_detector.py tests/askar/test_betti_topology.py tests/askar/test_ricci_wide_panel_final.py results/betti1_verdict.json results/verdict_wide_panel.json data/askar_full/panel_hourly_extended.parquet Supersedes and replaces branches: research/askar-betti1-topology-pr198 (deleted after merge) research/askar-ricci-wide-panel-vix-hyg-gate (already deleted, SUPERSEDED) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 17693ba commit 84d728d

8 files changed

Lines changed: 1177 additions & 0 deletions

File tree

5.99 MB
Binary file not shown.

research/askar/betti_topology.py

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
"""Betti-1 topology signal on Askar's 57-asset extended hourly panel.
2+
3+
Topologically distinct from Forman-Ricci (PR #194/#197): Ricci is a
4+
local per-edge curvature, B₁ is a global invariant counting
5+
independent cycles in the 1-skeleton of the Vietoris-Rips complex
6+
built from the rolling correlation graph.
7+
8+
Grounding: Gidea & Katz (2018) "Topological data analysis of financial
9+
time series" — Betti-1 shows documented pre-crash lift 10-20 days
10+
ahead of equity index drawdowns, which lines up with the 10-30 bar
11+
lead-capture window we track in the stress-detector gate.
12+
13+
Formula (1-skeleton, O(edges) per bar, numpy + scipy only):
14+
15+
B₁(t) = |E(t)| − |V| + k(t)
16+
17+
where
18+
E(t) = set of edges with |corr(returns[t−W:t])| > threshold
19+
V = 57 (number of panel assets, constant)
20+
k(t) = number of connected components of the adjacency matrix
21+
(scipy.sparse.csgraph.connected_components)
22+
23+
Same 3-D gate as ricci_wide_panel_final:
24+
DETECT IC(B₁, fwd_SPX_1h) ≥ 0.08 AND permutation p < 0.10
25+
DISCRIMINATE |corr(B₁, mom_20)| < 0.15 AND |corr(B₁, vol_10)| < 0.15
26+
DELIVER alerts(B₁ > expanding Q90) lead ≥ 60 % of
27+
future 20-bar cumulative SPX drawdown events (< −5 %)
28+
29+
Output: results/betti1_verdict.json
30+
"""
31+
32+
from __future__ import annotations
33+
34+
import json
35+
from pathlib import Path
36+
from typing import Any
37+
38+
import numpy as np
39+
import pandas as pd
40+
from scipy.sparse.csgraph import connected_components
41+
from scipy.stats import spearmanr
42+
43+
REPO_ROOT = Path(__file__).resolve().parents[2]
44+
EXTENDED_PANEL_PATH = REPO_ROOT / "data" / "askar_full" / "panel_hourly_extended.parquet"
45+
VERDICT_PATH = REPO_ROOT / "results" / "betti1_verdict.json"
46+
47+
WINDOW = 60
48+
THRESHOLD = 0.30
49+
MOMENTUM_WINDOW = 20
50+
VOL_WINDOW = 10
51+
PERMUTATIONS = 500
52+
LEAD_FWD_WINDOW = 20
53+
LEAD_LOOKBACK_MAX = 30
54+
LEAD_LOOKBACK_MIN = 10
55+
DRAWDOWN_THRESHOLD = -0.05
56+
57+
IC_GATE = 0.08
58+
P_GATE = 0.10
59+
CORR_FACTOR_GATE = 0.15
60+
LEAD_CAPTURE_GATE = 0.60
61+
62+
TARGET_COL = "USA_500_Index"
63+
VIX_LIKE = "VIX"
64+
HYG_LIKE = "High_Yield"
65+
66+
67+
# -------------------------------------------------------------------- #
68+
# Core computation: B₁ = |E| − |V| + k
69+
# -------------------------------------------------------------------- #
70+
71+
72+
def compute_betti1(
73+
returns: pd.DataFrame,
74+
window: int = WINDOW,
75+
threshold: float = THRESHOLD,
76+
) -> pd.Series:
77+
"""Rolling first Betti number of the thresholded correlation graph.
78+
79+
B₁ is the dimension of the 1-cycle space of the 1-skeleton:
80+
B₁ = |E| − |V| + k
81+
where k is the number of connected components. Computed in
82+
O(edges) per bar via scipy.sparse.csgraph.connected_components.
83+
"""
84+
vals = returns.to_numpy(dtype=float)
85+
n_bars, n_assets = vals.shape
86+
out = pd.Series(np.nan, index=returns.index, dtype=float)
87+
if n_bars < window or n_assets < 2:
88+
return out
89+
90+
for t in range(window - 1, n_bars):
91+
w = vals[t - window + 1 : t + 1]
92+
corr = np.corrcoef(w, rowvar=False)
93+
corr = np.nan_to_num(corr, nan=0.0)
94+
adj = (np.abs(corr) > threshold).astype(int)
95+
np.fill_diagonal(adj, 0)
96+
edges = int(adj.sum() // 2)
97+
n_comp_raw, _ = connected_components(adj, directed=False)
98+
n_comp = int(n_comp_raw)
99+
out.iloc[t] = float(edges - n_assets + n_comp)
100+
return out
101+
102+
103+
# -------------------------------------------------------------------- #
104+
# Statistical primitives (inline, zero-dependency beyond scipy)
105+
# -------------------------------------------------------------------- #
106+
107+
108+
def _scorr(a: pd.Series, b: pd.Series) -> float:
109+
frame = pd.concat([a, b], axis=1).dropna()
110+
if len(frame) < 30:
111+
return 0.0
112+
rho, _ = spearmanr(frame.iloc[:, 0], frame.iloc[:, 1])
113+
return float(rho) if np.isfinite(rho) else 0.0
114+
115+
116+
def _permutation_pvalue(
117+
signal: pd.Series,
118+
target: pd.Series,
119+
permutations: int = PERMUTATIONS,
120+
seed: int = 42,
121+
) -> float:
122+
frame = pd.concat([signal, target], axis=1).dropna()
123+
if len(frame) < 30:
124+
return 1.0
125+
x = frame.iloc[:, 0].to_numpy(dtype=float)
126+
y = frame.iloc[:, 1].to_numpy(dtype=float)
127+
obs_raw = float(spearmanr(x, y).statistic)
128+
obs = abs(obs_raw) if np.isfinite(obs_raw) else 0.0
129+
rng = np.random.default_rng(seed)
130+
count = 0
131+
for _ in range(permutations):
132+
y_perm = rng.permutation(y)
133+
s_raw = float(spearmanr(x, y_perm).statistic)
134+
s = abs(s_raw) if np.isfinite(s_raw) else 0.0
135+
if s >= obs:
136+
count += 1
137+
return float((count + 1) / (permutations + 1))
138+
139+
140+
def _lead_capture(b1: pd.Series, fwd_cum: pd.Series, threshold: float) -> tuple[float, int, int]:
141+
alerts = b1 > b1.expanding().quantile(0.90)
142+
drawdown_events = fwd_cum[fwd_cum < threshold]
143+
if len(drawdown_events) == 0:
144+
return 0.0, 0, 0
145+
captured = 0
146+
for ts in drawdown_events.index:
147+
loc = alerts.index.get_loc(ts)
148+
if not isinstance(loc, int):
149+
continue
150+
lo = max(0, int(loc) - LEAD_LOOKBACK_MAX)
151+
hi = max(0, int(loc) - LEAD_LOOKBACK_MIN)
152+
if hi > lo and bool(alerts.iloc[lo:hi].any()):
153+
captured += 1
154+
return float(captured / len(drawdown_events)), captured, int(len(drawdown_events))
155+
156+
157+
# -------------------------------------------------------------------- #
158+
# Orchestration
159+
# -------------------------------------------------------------------- #
160+
161+
162+
def run() -> dict[str, Any]:
163+
if not EXTENDED_PANEL_PATH.exists():
164+
raise FileNotFoundError(
165+
f"PRIME_ARCHITECT HALT: missing extended panel: {EXTENDED_PANEL_PATH}"
166+
)
167+
prices = pd.read_parquet(EXTENDED_PANEL_PATH)
168+
prices.index = pd.to_datetime(prices.index)
169+
prices = prices.sort_index()
170+
171+
ratio = prices / prices.shift(1)
172+
log_arr = np.log(ratio.to_numpy())
173+
returns = (
174+
pd.DataFrame(log_arr, index=ratio.index, columns=ratio.columns)
175+
.replace([np.inf, -np.inf], np.nan)
176+
.dropna()
177+
)
178+
179+
b1 = compute_betti1(returns, window=WINDOW, threshold=THRESHOLD)
180+
target = returns[TARGET_COL].shift(-1)
181+
182+
momentum_20 = returns[TARGET_COL].rolling(MOMENTUM_WINDOW).sum()
183+
vol_10 = returns[TARGET_COL].rolling(VOL_WINDOW).std()
184+
185+
vix_frame = returns.filter(like=VIX_LIKE)
186+
hyg_frame = returns.filter(like=HYG_LIKE)
187+
vix_ret = (
188+
vix_frame.mean(axis=1)
189+
if vix_frame.shape[1] > 0
190+
else pd.Series(np.nan, index=returns.index, dtype=float)
191+
)
192+
hyg_ret = (
193+
hyg_frame.mean(axis=1)
194+
if hyg_frame.shape[1] > 0
195+
else pd.Series(np.nan, index=returns.index, dtype=float)
196+
)
197+
198+
ic = _scorr(b1, target)
199+
p_value = _permutation_pvalue(b1, target, permutations=PERMUTATIONS)
200+
corr_m = _scorr(b1, momentum_20)
201+
corr_v = _scorr(b1, vol_10)
202+
corr_vix = _scorr(b1, vix_ret)
203+
corr_hyg = _scorr(b1, hyg_ret)
204+
205+
fwd_cum = returns[TARGET_COL].rolling(LEAD_FWD_WINDOW).sum().shift(-LEAD_FWD_WINDOW)
206+
lead_capture, captured_n, event_n = _lead_capture(b1, fwd_cum, DRAWDOWN_THRESHOLD)
207+
208+
detect = bool(np.isfinite(ic) and ic >= IC_GATE and p_value < P_GATE)
209+
discriminate = bool(
210+
np.isfinite(corr_m)
211+
and np.isfinite(corr_v)
212+
and abs(corr_m) < CORR_FACTOR_GATE
213+
and abs(corr_v) < CORR_FACTOR_GATE
214+
)
215+
deliver = bool(lead_capture >= LEAD_CAPTURE_GATE)
216+
final_pass = detect and discriminate and deliver
217+
218+
verdict = {
219+
"substrate": {
220+
"source": str(EXTENDED_PANEL_PATH.relative_to(REPO_ROOT)),
221+
"n_assets": int(returns.shape[1]),
222+
"n_bars": int(len(returns)),
223+
"first_ts": str(returns.index.min()),
224+
"last_ts": str(returns.index.max()),
225+
"window": WINDOW,
226+
"threshold": THRESHOLD,
227+
"permutations": PERMUTATIONS,
228+
},
229+
"b1_stats": {
230+
"mean": float(b1.dropna().mean()) if b1.notna().any() else None,
231+
"median": float(b1.dropna().median()) if b1.notna().any() else None,
232+
"p05": float(b1.dropna().quantile(0.05)) if b1.notna().any() else None,
233+
"p95": float(b1.dropna().quantile(0.95)) if b1.notna().any() else None,
234+
"max": float(b1.dropna().max()) if b1.notna().any() else None,
235+
},
236+
"IC": round(float(ic), 6),
237+
"p_value": round(float(p_value), 6),
238+
"corr_momentum": round(float(corr_m), 6),
239+
"corr_vol": round(float(corr_v), 6),
240+
"corr_vix": round(float(corr_vix), 6),
241+
"corr_hyg": round(float(corr_hyg), 6),
242+
"lead_capture": round(float(lead_capture), 6),
243+
"lead_capture_detail": {
244+
"captured": captured_n,
245+
"drawdown_events": event_n,
246+
"drawdown_threshold": DRAWDOWN_THRESHOLD,
247+
"fwd_window": LEAD_FWD_WINDOW,
248+
"lookback_bars": [LEAD_LOOKBACK_MIN, LEAD_LOOKBACK_MAX],
249+
},
250+
"DETECT": "PASS" if detect else "FAIL",
251+
"DISCRIMINATE": "PASS" if discriminate else "FAIL",
252+
"DELIVER": "PASS" if deliver else "FAIL",
253+
"FINAL": "SIGNAL_READY" if final_pass else "REJECT",
254+
}
255+
256+
VERDICT_PATH.parent.mkdir(parents=True, exist_ok=True)
257+
VERDICT_PATH.write_text(json.dumps(verdict, indent=2))
258+
print(json.dumps(verdict, indent=2))
259+
return verdict
260+
261+
262+
if __name__ == "__main__":
263+
run()

0 commit comments

Comments
 (0)