neuron7xLab
diff --git a/‎research/microstructure/killtest.py‎
Lines changed: 19 additions & 1 deletion b/‎research/microstructure/killtest.py‎
Lines changed: 19 additions & 1 deletion
diff --git a/‎research/microstructure/regime.py‎
Lines changed: 133 additions & 0 deletions b/‎research/microstructure/regime.py‎
Lines changed: 133 additions & 0 deletions
diff --git a/‎scripts/l2_killtest_recursive.py‎
Lines changed: 146 additions & 0 deletions b/‎scripts/l2_killtest_recursive.py‎
Lines changed: 146 additions & 0 deletions
@@ -355,13 +355,31 @@ def run_killtest(
     horizons_sec: tuple[int, ...] = _TARGET_HORIZONS_SEC,
     ic_gate: float = _IC_GATE,
     pvalue_gate: float = _PERM_PVALUE_GATE,
+    regime_mask: NDArray[np.bool_] | None = None,
     seed: int = SEED,
 ) -> GateVerdict:
-    """Execute the full fail-fast gate and emit a binary verdict."""
+    """Execute the full fail-fast gate and emit a binary verdict.
+
+    When `regime_mask` is provided (shape (n_rows,)), IC and null-test
+    computations count only rows where the mask is True. The Ricci signal
+    is still computed on the full contiguous time series (its rolling
+    cross-sectional correlation needs consecutive rows) — the filter acts
+    at scoring time, not feature-construction time.
+    """
     ricci_signal_1d = cross_sectional_ricci_signal(features.ofi)
     ricci_panel = np.repeat(ricci_signal_1d[:, None], features.n_symbols, axis=1)
     target = _forward_log_return(features.mid, primary_horizon_sec)
 
+    if regime_mask is not None:
+        if regime_mask.shape != (features.n_rows,):
+            raise ValueError(
+                f"regime_mask shape {regime_mask.shape} must equal ({features.n_rows},)"
+            )
+        # broadcast row-mask to panel shape
+        panel_mask = np.broadcast_to(regime_mask[:, None], ricci_panel.shape)
+        ricci_panel = np.where(panel_mask, ricci_panel, np.nan)
+        target = np.where(panel_mask, target, np.nan)
+
     ic_signal = _pooled_ic(ricci_panel, target)
 
     ret_1s = np.vstack(
 
@@ -0,0 +1,133 @@
+"""Regime detection for the L2 kill-test substrate.
+
+Motivation: recursive / cyclic analysis on the collected 5h14m window
+shows IC is intermittent — some time blocks produce IC > 0.15, others
+invert to IC < -0.05. Full-window verdict averages these. The next
+inevitable question is: *when* is the Ricci cross-sectional signal
+predictive?
+
+The regime_analysis step flagged cross-asset mean correlation
+(`corr_mean` of mid-return) as the feature with the strongest
+(directional) relationship to block IC. This module exposes that
+feature as a per-row rolling score, and applies a threshold to build
+a boolean regime mask consumable by `run_killtest(regime_mask=...)`.
+
+Only one public function and one helper. No new dataclasses.
+"""
+
+from __future__ import annotations
+
+import numpy as np
+from numpy.typing import NDArray
+
+from research.microstructure.killtest import FeatureFrame
+
+_MIN_WINDOW_ROWS: int = 60
+
+
+def rolling_corr_regime(
+    features: FeatureFrame,
+    *,
+    window_rows: int = 300,
+) -> NDArray[np.float64]:
+    """Rolling mean off-diagonal correlation of 1-sec mid-return across symbols.
+
+    For each row t >= window_rows, compute the correlation matrix of the
+    `window_rows` most-recent 1-sec log-return vectors (rows are time,
+    columns are symbols). Return the mean of off-diagonal entries as the
+    regime score. Earlier rows are NaN.
+
+    High score ⇒ cross-asset correlation is high ⇒ cross-sectional Ricci
+    signal has meaningful structure to measure. Low score ⇒ assets decouple,
+    Ricci κ_min becomes noise-driven.
+    """
+    if window_rows < _MIN_WINDOW_ROWS:
+        raise ValueError(f"window_rows must be >= {_MIN_WINDOW_ROWS}, got {window_rows}")
+    if features.n_symbols < 2:
+        raise ValueError(f"need >= 2 symbols for cross-asset correlation, got {features.n_symbols}")
+
+    log_mid = np.log(features.mid)
+    ret = np.vstack([np.zeros((1, features.n_symbols)), np.diff(log_mid, axis=0)])
+    n = ret.shape[0]
+    out = np.full(n, np.nan, dtype=np.float64)
+    eye_mask = ~np.eye(features.n_symbols, dtype=bool)
+
+    for t in range(window_rows, n):
+        block = ret[t - window_rows : t]
+        if not np.all(np.isfinite(block)):
+            continue
+        std = block.std(axis=0)
+        if np.any(std < 1e-14):
+            continue
+        corr_raw = np.corrcoef(block.T)
+        corr = np.nan_to_num(np.asarray(corr_raw, dtype=np.float64), nan=0.0)
+        out[t] = float(corr[eye_mask].mean())
+    return out
+
+
+def rolling_rv_regime(
+    features: FeatureFrame,
+    *,
+    window_rows: int = 300,
+) -> NDArray[np.float64]:
+    """Rolling realized volatility (per-symbol mean) of 1-sec mid-return.
+
+    Walk-forward analysis on the collected 5h14m substrate identified
+    realized vol as the single strongest regime discriminator for
+    Ricci IC (Spearman ρ=+0.352, p=0.008 across 56 rolling windows;
+    low-vol quartile IC median = +0.027 vs high-vol quartile IC
+    median = +0.137).
+
+    High score ⇒ there is flow / activity ⇒ OFI drives observable
+    price changes ⇒ cross-sectional Ricci has structural content
+    to score. Low score ⇒ the book is inert ⇒ OFI → 0 → Ricci → noise.
+
+    Implementation: per-row rolling std of 1-sec log-returns averaged
+    across symbols. No baseline subtraction (we want absolute activity,
+    not anomaly vs expected).
+    """
+    if window_rows < _MIN_WINDOW_ROWS:
+        raise ValueError(f"window_rows must be >= {_MIN_WINDOW_ROWS}, got {window_rows}")
+    if features.n_symbols < 1:
+        raise ValueError(f"need >= 1 symbol, got {features.n_symbols}")
+
+    log_mid = np.log(features.mid)
+    ret = np.vstack([np.zeros((1, features.n_symbols)), np.diff(log_mid, axis=0)])
+    n = ret.shape[0]
+    out = np.full(n, np.nan, dtype=np.float64)
+    for t in range(window_rows, n):
+        block = ret[t - window_rows : t]
+        if not np.all(np.isfinite(block)):
+            continue
+        out[t] = float(block.std(axis=0).mean())
+    return out
+
+
+def regime_mask_from_score(
+    score: NDArray[np.float64],
+    *,
+    threshold: float,
+) -> NDArray[np.bool_]:
+    """Boolean mask: True where score >= threshold and finite, False otherwise."""
+    mask = np.isfinite(score) & (score >= threshold)
+    return mask.astype(bool)
+
+
+def regime_mask_from_quantile(
+    score: NDArray[np.float64],
+    *,
+    quantile: float,
+) -> NDArray[np.bool_]:
+    """Boolean mask: True where score >= empirical quantile of the finite scores.
+
+    quantile must lie in (0, 1); e.g. 0.5 keeps the top half, 0.25 keeps
+    the top 75%. Finite-threshold-free alternative when absolute score
+    scale depends on substrate.
+    """
+    if not 0.0 < quantile < 1.0:
+        raise ValueError(f"quantile must lie in (0, 1), got {quantile}")
+    finite = score[np.isfinite(score)]
+    if finite.size == 0:
+        return np.zeros_like(score, dtype=bool)
+    threshold = float(np.quantile(finite, quantile))
+    return regime_mask_from_score(score, threshold=threshold)
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""Recursive + cyclic reality check on the collected L2 substrate.
+
+Uses only the existing primitives (`build_feature_frame`, `slice_features`,
+`run_killtest`, `run_killtest_split`). No new dataclasses, no new modules,
+no new gate logic. Two orthogonal views:
+
+1. RECURSIVE BISECTION (depth-first): at depth d, each cell is a 1/(2**d)
+   contiguous slice of the full window. Reports IC + residual_IC at every
+   cell. Signal is `deep` iff it survives every leaf at some depth.
+2. CYCLIC BLOCKS (breadth): split the full window into K adjacent disjoint
+   blocks of equal size; report IC trajectory across them. Signal is
+   `stable` iff IC sign + magnitude are preserved across blocks.
+
+Reality = what both views say simultaneously.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from research.microstructure.killtest import (
+    _load_parquets as load_parquets,
+)
+from research.microstructure.killtest import (
+    build_feature_frame,
+    run_killtest,
+    slice_features,
+)
+from research.microstructure.l2_schema import DEFAULT_SYMBOLS
+
+_MIN_ROWS_PER_CELL = 1500
+_MAX_DEPTH = 3
+_CYCLIC_K = 8
+
+
+def _recurse(features_obj: object, path: str, depth: int, results: list[dict[str, object]]) -> None:
+    from research.microstructure.killtest import FeatureFrame  # noqa: PLC0415
+
+    assert isinstance(features_obj, FeatureFrame)
+    features: FeatureFrame = features_obj
+
+    if features.n_rows < _MIN_ROWS_PER_CELL:
+        results.append(
+            {
+                "path": path,
+                "depth": depth,
+                "n_samples": features.n_rows,
+                "ic_signal": float("nan"),
+                "residual_ic": float("nan"),
+                "residual_p": float("nan"),
+                "note": "too_small",
+            }
+        )
+        return
+
+    v = run_killtest(features)
+    results.append(
+        {
+            "path": path,
+            "depth": depth,
+            "n_samples": v.n_samples,
+            "ic_signal": v.ic_signal,
+            "residual_ic": v.residual_ic,
+            "residual_p": v.residual_ic_pvalue,
+            "verdict": v.verdict,
+            "reasons_count": len(v.reasons),
+        }
+    )
+
+    if depth >= _MAX_DEPTH:
+        return
+    mid = features.n_rows // 2
+    left = slice_features(features, 0, mid)
+    right = slice_features(features, mid, features.n_rows)
+    _recurse(left, f"{path}L", depth + 1, results)
+    _recurse(right, f"{path}R", depth + 1, results)
+
+
+def main() -> int:
+    data_dir = Path("data/binance_l2_perp")
+    frames = load_parquets(data_dir, DEFAULT_SYMBOLS)
+    features = build_feature_frame(frames, DEFAULT_SYMBOLS)
+    print(f"substrate: n_rows={features.n_rows}  n_symbols={features.n_symbols}")
+    print()
+
+    # --- 1. Recursive bisection ---
+    print("=" * 74)
+    print("RECURSIVE BISECTION TREE  (depth 0 = full; L/R = halves at each split)")
+    print("=" * 74)
+    tree: list[dict[str, object]] = []
+    _recurse(features, "·", 0, tree)
+    print(f"{'path':<10} {'depth':<6} {'n':<7} {'IC':>8} {'residual':>10} {'p':>8} {'verdict':<10}")
+    for row in tree:
+        p = row["path"]
+        d = row["depth"]
+        n = row["n_samples"]
+        if row.get("note") == "too_small":
+            print(f"{p:<10} {d:<6} {n:<7}   — too small for stable IC —")
+            continue
+        ic = row["ic_signal"]
+        rr = row["residual_ic"]
+        pv = row["residual_p"]
+        vd = row["verdict"]
+        assert isinstance(p, str) and isinstance(d, int) and isinstance(n, int)
+        assert isinstance(ic, float) and isinstance(rr, float) and isinstance(pv, float)
+        assert isinstance(vd, str)
+        print(f"{p:<10} {d:<6} {n:<7} {ic:>+8.4f} {rr:>+10.4f} {pv:>8.4f} {vd:<10}")
+    print()
+
+    # --- 2. Cyclic K blocks ---
+    print("=" * 74)
+    print(f"CYCLIC BLOCKS  (K={_CYCLIC_K} adjacent disjoint windows)")
+    print("=" * 74)
+    block = features.n_rows // _CYCLIC_K
+    print(
+        f"{'block':<6} {'start':<6} {'end':<6} {'n':<7} {'IC':>8} {'residual':>10} {'p':>8} {'verdict':<10}"
+    )
+    ic_series: list[float] = []
+    for k in range(_CYCLIC_K):
+        start = k * block
+        end = (k + 1) * block if k < _CYCLIC_K - 1 else features.n_rows
+        sub = slice_features(features, start, end)
+        if sub.n_rows < _MIN_ROWS_PER_CELL:
+            print(f"{k:<6} {start:<6} {end:<6} {sub.n_rows:<7}   — too small —")
+            continue
+        v = run_killtest(sub)
+        ic_series.append(v.ic_signal)
+        print(
+            f"{k:<6} {start:<6} {end:<6} {v.n_samples:<7} "
+            f"{v.ic_signal:>+8.4f} {v.residual_ic:>+10.4f} {v.residual_ic_pvalue:>8.4f} "
+            f"{v.verdict:<10}"
+        )
+    print()
+    if ic_series:
+        n_pos = sum(1 for ic in ic_series if ic > 0)
+        avg = sum(ic_series) / len(ic_series)
+        print(
+            f"summary: {n_pos}/{len(ic_series)} blocks positive IC  avg={avg:+.4f}  "
+            f"min={min(ic_series):+.4f}  max={max(ic_series):+.4f}"
+        )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())