BorgwardtLab
diff --git a/‎polygraph/metrics/base/polygraphdiscrepancy.py‎
Lines changed: 3 additions & 6 deletions b/‎polygraph/metrics/base/polygraphdiscrepancy.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎reproducibility/01_subsampling/compute_mmd.py‎
Lines changed: 0 additions & 15 deletions b/‎reproducibility/01_subsampling/compute_mmd.py‎
Lines changed: 0 additions & 15 deletions
diff --git a/‎reproducibility/01_subsampling/compute_pgd.py‎
Lines changed: 0 additions & 12 deletions b/‎reproducibility/01_subsampling/compute_pgd.py‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎reproducibility/02_perturbation/compute.py‎
Lines changed: 0 additions & 25 deletions b/‎reproducibility/02_perturbation/compute.py‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎reproducibility/02_perturbation/plot.py‎
Lines changed: 0 additions & 25 deletions b/‎reproducibility/02_perturbation/plot.py‎
Lines changed: 0 additions & 25 deletions
diff --git a/‎reproducibility/03_model_quality/compute_vun.py‎
Lines changed: 0 additions & 10 deletions b/‎reproducibility/03_model_quality/compute_vun.py‎
Lines changed: 0 additions & 10 deletions
diff --git a/‎reproducibility/03_model_quality/format.py‎
Lines changed: 0 additions & 20 deletions b/‎reproducibility/03_model_quality/format.py‎
Lines changed: 0 additions & 20 deletions
diff --git a/‎reproducibility/05_benchmark/compute.py‎
Lines changed: 0 additions & 3 deletions b/‎reproducibility/05_benchmark/compute.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎reproducibility/05_benchmark/compute_vun.py‎
Lines changed: 0 additions & 10 deletions b/‎reproducibility/05_benchmark/compute_vun.py‎
Lines changed: 0 additions & 10 deletions
@@ -147,12 +147,9 @@ def _is_constant(X) -> bool:
     if issparse(X):
         if X.shape[0] <= 1:
             return True
-        first = X[0]
-        for i in range(1, X.shape[0]):
-            diff = X[i] - first
-            if diff.nnz > 0:
-                return False
-        return True
+        col_min = X.min(axis=0).toarray().ravel()
+        col_max = X.max(axis=0).toarray().ravel()
+        return bool(np.array_equal(col_min, col_max))
     return bool(np.all(X == X[0]))
 
 
 
@@ -41,9 +41,6 @@
 )
 from polygraph.utils.kernels import AdaptiveRBFKernel
 
-# ---------------------------------------------------------------------------
-# Paths (resolved before Hydra touches CWD; we disable chdir in the config)
-# ---------------------------------------------------------------------------
 REPO_ROOT = here()
 DATA_DIR = REPO_ROOT / "data"
 EXPERIMENT_RESULTS_DIR = (
@@ -65,9 +62,6 @@
 ]
 
 
-# ---------------------------------------------------------------------------
-# Graph loading helpers
-# ---------------------------------------------------------------------------
 def load_graphs(model: str, dataset: str) -> List[nx.Graph]:
     """Load model-generated graphs from ``data/{model}/{dataset}.pkl``."""
     pkl_path = DATA_DIR / model / f"{dataset}.pkl"
@@ -115,9 +109,6 @@ def get_reference_dataset(
     return list(classes[dataset](split=split, num_graphs=num_graphs).to_nx())
 
 
-# ---------------------------------------------------------------------------
-# Descriptor factory
-# ---------------------------------------------------------------------------
 def make_descriptor(name: str, reference_graphs: List[nx.Graph]):
     """Instantiate a descriptor by name, matching the original experiment."""
     factories = {
@@ -137,9 +128,6 @@ def make_descriptor(name: str, reference_graphs: List[nx.Graph]):
     return factories[name]()
 
 
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
 @hydra.main(
     config_path="../configs",
     config_name="01_subsampling_mmd",
@@ -278,7 +266,6 @@ def main(cfg: DictConfig) -> None:
             "mmd_std": float(result.std),
             "mmd_low": float(result.low) if result.low is not None else None,
             "mmd_high": float(result.high) if result.high is not None else None,
-            "mmd_runtime_seconds": mmd_runtime_perf_seconds,
             "mmd_runtime_perf_seconds": mmd_runtime_perf_seconds,
         }
 
@@ -301,7 +288,6 @@ def main(cfg: DictConfig) -> None:
                 "status": "ok",
                 "output_path": str(out_path),
                 "result": output,
-                "mmd_runtime_seconds": mmd_runtime_perf_seconds,
                 "mmd_runtime_perf_seconds": mmd_runtime_perf_seconds,
             }
         )
@@ -329,7 +315,6 @@ def main(cfg: DictConfig) -> None:
                 "variant": variant,
                 "status": "error",
                 "error": str(e),
-                "mmd_runtime_seconds": metric_runtime_perf_seconds,
                 "mmd_runtime_perf_seconds": metric_runtime_perf_seconds,
             }
         )
 
@@ -53,19 +53,13 @@ def _make_tabpfn_classifier(weights_version: str):
     )
 
 
-# ---------------------------------------------------------------------------
-# Paths (resolved before Hydra touches CWD; we disable chdir in the config)
-# ---------------------------------------------------------------------------
 REPO_ROOT = here()
 DATA_DIR = REPO_ROOT / "data"
 EXPERIMENT_RESULTS_DIR = (
     REPO_ROOT / "reproducibility" / "figures" / "01_subsampling" / "results"
 )
 
 
-# ---------------------------------------------------------------------------
-# Graph loading helpers
-# ---------------------------------------------------------------------------
 def load_graphs(model: str, dataset: str) -> List[nx.Graph]:
     """Load model-generated graphs from ``data/{model}/{dataset}.pkl``."""
     pkl_path = DATA_DIR / model / f"{dataset}.pkl"
@@ -113,9 +107,6 @@ def get_reference_dataset(
     return list(classes[dataset](split=split, num_graphs=num_graphs).to_nx())
 
 
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
 @hydra.main(
     config_path="../configs",
     config_name="01_subsampling_pgd",
@@ -238,7 +229,6 @@ def main(cfg: DictConfig) -> None:
             "num_bootstrap": num_bootstrap,
             "pgd_mean": float(result["pgd"].mean),
             "pgd_std": float(result["pgd"].std),
-            "pgd_runtime_seconds": pgd_runtime_perf_seconds,
             "pgd_runtime_perf_seconds": pgd_runtime_perf_seconds,
             "tabpfn_package_version": pkg_version("tabpfn"),
             "tabpfn_weights_version": tabpfn_weights_version,
@@ -271,7 +261,6 @@ def main(cfg: DictConfig) -> None:
                 "status": "ok",
                 "output_path": str(out_path),
                 "result": output,
-                "pgd_runtime_seconds": pgd_runtime_perf_seconds,
                 "pgd_runtime_perf_seconds": pgd_runtime_perf_seconds,
             }
         )
@@ -295,7 +284,6 @@ def main(cfg: DictConfig) -> None:
                 "subsample_size": subsample_size,
                 "status": "error",
                 "error": str(e),
-                "pgd_runtime_seconds": metric_runtime_perf_seconds,
                 "pgd_runtime_perf_seconds": metric_runtime_perf_seconds,
             }
         )
 
@@ -82,11 +82,6 @@
 _RBF_BW = np.array([0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0])
 
 
-# ---------------------------------------------------------------------------
-# Perturbation functions (inline, matching original library implementations)
-# ---------------------------------------------------------------------------
-
-
 def edge_rewiring(graph: nx.Graph, noise_level: float) -> nx.Graph:
     """Rewire edges: each selected with P(noise_level), one endpoint reconnected."""
     if not (0 <= noise_level <= 1):
@@ -252,11 +247,6 @@ def edge_addition(graph: nx.Graph, noise_level: float) -> nx.Graph:
 }
 
 
-# ---------------------------------------------------------------------------
-# Dataset loading
-# ---------------------------------------------------------------------------
-
-
 def load_dataset(
     dataset: str, num_graphs: int, seed: int
 ) -> Tuple[List[nx.Graph], List[nx.Graph]]:
@@ -335,11 +325,6 @@ def load_dataset(
     return reference_graphs, perturbed_graphs
 
 
-# ---------------------------------------------------------------------------
-# Metric initialization
-# ---------------------------------------------------------------------------
-
-
 def _make_classifier(name: str, tabpfn_weights_version: str = "v2.5"):
     """Build a classifier by name. For TabPFN, respects weights version."""
     if name == "tabpfn":
@@ -457,11 +442,6 @@ def build_metrics(
     return metrics
 
 
-# ---------------------------------------------------------------------------
-# Evaluation
-# ---------------------------------------------------------------------------
-
-
 def evaluate_metrics(
     perturbed_graphs: List[nx.Graph],
     metrics: Dict[str, Any],
@@ -491,11 +471,6 @@ def evaluate_metrics(
     return result
 
 
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-
 @hydra.main(
     config_path="../configs", config_name="02_perturbation", version_base=None
 )
 
@@ -226,11 +226,6 @@ def _compute_spearman(series: pd.Series, noise: pd.Series) -> float:
     return float(rho)  # type: ignore[arg-type]
 
 
-# ---------------------------------------------------------------------------
-# Figure 1 & 2: Correlation bar plots
-# ---------------------------------------------------------------------------
-
-
 def plot_correlation_bars(
     all_data: Dict,
     classifier: str,
@@ -329,11 +324,6 @@ def plot_correlation_bars(
     logger.success("Saved: {}", out)
 
 
-# ---------------------------------------------------------------------------
-# Figures 3-6: Metrics vs noise level (faceted grid)
-# ---------------------------------------------------------------------------
-
-
 def plot_metrics_vs_noise(
     all_data: Dict,
     classifier: str,
@@ -485,11 +475,6 @@ def plot_metrics_vs_noise(
     logger.success("Saved: {}", out)
 
 
-# ---------------------------------------------------------------------------
-# Figure 7: LR vs TabPFN comparison
-# ---------------------------------------------------------------------------
-
-
 def plot_lr_vs_tabpfn(
     all_data: Dict,
     variant: str,
@@ -586,11 +571,6 @@ def plot_lr_vs_tabpfn(
     logger.success("Saved: {}", out)
 
 
-# ---------------------------------------------------------------------------
-# Single-dataset perturbation figures (e.g. SBM-only)
-# ---------------------------------------------------------------------------
-
-
 def plot_single_dataset_perturbation(
     all_data: Dict,
     classifier: str,
@@ -743,11 +723,6 @@ def plot_single_dataset_perturbation(
     logger.success("Saved: {}", out)
 
 
-# ---------------------------------------------------------------------------
-# CLI
-# ---------------------------------------------------------------------------
-
-
 def _load_results_dir(results_dir: Path) -> Dict[Tuple[str, str], dict]:
     """Load all perturbation JSON results from a directory."""
     data = {}
 
@@ -40,11 +40,6 @@
 from utils.vun import compute_vun_parallel  # noqa: E402
 
 
-# ---------------------------------------------------------------------------
-# Graph loading (mirrors compute.py)
-# ---------------------------------------------------------------------------
-
-
 def load_graphs(path: Path) -> List[nx.Graph]:
     """Load graphs from pickle file and convert to networkx."""
     if not path.exists():
@@ -89,11 +84,6 @@ def get_reference_dataset(
     return ds, list(ds.to_nx())
 
 
-# ---------------------------------------------------------------------------
-# Main
-# ---------------------------------------------------------------------------
-
-
 @hydra.main(
     config_path="../configs",
     config_name="03_model_quality_vun",
 
@@ -96,11 +96,6 @@ def _neg_pearson(x, y) -> float:
     return -float(r)  # type: ignore[arg-type]
 
 
-# ---------------------------------------------------------------------------
-# Table 1 & 3: Pearson correlation of validity with other metrics
-# ---------------------------------------------------------------------------
-
-
 def _format_row_with_ranking(
     values: list[float], fmt: str = "{:.2f}"
 ) -> list[str]:
@@ -186,11 +181,6 @@ def generate_pearson_correlation_table(variant: str) -> str:
     return "\n".join(lines)
 
 
-# ---------------------------------------------------------------------------
-# Table 2 & 4: Spearman correlation with training iterations
-# ---------------------------------------------------------------------------
-
-
 def generate_spearman_training_table(variant: str) -> str:
     """Generate Spearman correlation table of metrics with training steps.
 
@@ -249,11 +239,6 @@ def generate_spearman_training_table(variant: str) -> str:
     return "\n".join(lines)
 
 
-# ---------------------------------------------------------------------------
-# Table 5: Denoising iterations MMD values
-# ---------------------------------------------------------------------------
-
-
 def generate_denoising_mmd_table() -> str:
     """Generate table of MMD values per denoising step."""
     df = load_results("denoising", "planar", "jsd")
@@ -289,11 +274,6 @@ def generate_denoising_mmd_table() -> str:
     return "\n".join(lines)
 
 
-# ---------------------------------------------------------------------------
-# Table 6: Denoising iterations PGS values
-# ---------------------------------------------------------------------------
-
-
 def generate_denoising_pgs_table(variant: str = "jsd") -> str:
     """Generate table of PGS values per denoising step, with optional VUN column."""
     df = load_results("denoising", "planar", variant)
 
@@ -26,9 +26,6 @@
 from utils.data import get_reference_dataset as _get_ref
 from utils.data import load_graphs as _load
 
-# ---------------------------------------------------------------------------
-# Paths (resolved before Hydra touches CWD)
-# ---------------------------------------------------------------------------
 REPO_ROOT = here()
 DATA_DIR = REPO_ROOT / "data"
 _RESULTS_DIR_BASE = REPO_ROOT / "reproducibility" / "tables" / "results"
 
@@ -39,11 +39,6 @@
 app = typer.Typer()
 
 
-# ---------------------------------------------------------------------------
-# Per-pair isomorphism with SIGALRM timeout
-# ---------------------------------------------------------------------------
-
-
 class _TimeoutError(Exception):
     pass
 
@@ -97,11 +92,6 @@ def __contains__(self, g: nx.Graph) -> bool:
         return False
 
 
-# ---------------------------------------------------------------------------
-# Parallel novelty worker
-# ---------------------------------------------------------------------------
-
-
 def _check_novel_worker(
     gen_graph_json: str,
     train_graphs_json: List[str],