BorgwardtLab
diff --git a/‎pixi.lock‎
Lines changed: 6 additions & 7 deletions b/‎pixi.lock‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎polygraph/datasets/base/caching.py‎
Lines changed: 1 addition & 8 deletions b/‎polygraph/datasets/base/caching.py‎
Lines changed: 1 addition & 8 deletions
diff --git a/‎polygraph/datasets/base/molecules.py‎
Lines changed: 0 additions & 18 deletions b/‎polygraph/datasets/base/molecules.py‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎polygraph/metrics/base/frechet_distance.py‎
Lines changed: 5 additions & 4 deletions b/‎polygraph/metrics/base/frechet_distance.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎polygraph/metrics/base/metric_interval.py‎
Lines changed: 0 additions & 12 deletions b/‎polygraph/metrics/base/metric_interval.py‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎polygraph/metrics/base/polygraphdiscrepancy.py‎
Lines changed: 5 additions & 5 deletions b/‎polygraph/metrics/base/polygraphdiscrepancy.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎polygraph/metrics/molecule_pgd.py‎
Lines changed: 14 additions & 18 deletions b/‎polygraph/metrics/molecule_pgd.py‎
Lines changed: 14 additions & 18 deletions
diff --git a/‎polygraph/metrics/rbf_mmd.py‎
Lines changed: 14 additions & 30 deletions b/‎polygraph/metrics/rbf_mmd.py‎
Lines changed: 14 additions & 30 deletions
@@ -2,7 +2,7 @@
 import os
 import shutil
 import urllib.request
-from typing import Any, Optional, Sequence
+from typing import Optional
 
 import filelock
 import torch
@@ -87,10 +87,3 @@ def load_from_cache(
     logger.debug(f"Loading data from {path}")
     data = torch.load(path, weights_only=True, mmap=mmap)
     return GraphStorage(**data)
-
-
-def to_list(value: Any) -> Sequence:
-    if isinstance(value, Sequence) and not isinstance(value, str):
-        return value
-    else:
-        return [value]
@@ -28,16 +28,6 @@
     Chem.rdchem.BondType.ZERO,
 ]
 
-BOND_STEREO_TYPES = [
-    Chem.rdchem.BondStereo.STEREONONE,
-    Chem.rdchem.BondStereo.STEREOZ,
-    Chem.rdchem.BondStereo.STEREOE,
-    Chem.rdchem.BondStereo.STEREOCIS,
-    Chem.rdchem.BondStereo.STEREOTRANS,
-    Chem.rdchem.BondStereo.STEREOANY,
-    Chem.rdchem.BondStereo.STEREOATROPCCW,
-    Chem.rdchem.BondStereo.STEREOATROPCW,
-]
 
 # Generalized atom vocabulary for all molecules
 N_UNIQUE_ATOMS = 119
@@ -74,14 +64,6 @@ def are_smiles_equivalent(smiles1, smiles2):
     return canonical_smiles1 == canonical_smiles2
 
 
-def mol2smiles(mol, canonical: bool = False):
-    try:
-        Chem.SanitizeMol(mol)
-    except ValueError as e:
-        print(e, mol)
-        return None
-    return Chem.MolToSmiles(mol, canonical=canonical)
-
 
 def smiles_with_explicit_hydrogens(smiles: str, canonical: bool = True) -> str:
     """Convert a SMILES string to a SMILES string with all hydrogens made explicit.
 
@@ -1,5 +1,4 @@
-from collections import namedtuple
-from typing import Callable, Collection, Generic
+from typing import Callable, Collection, Generic, NamedTuple
 
 import numpy as np
 import scipy
@@ -10,7 +9,10 @@
 
 __all__ = ["FittedFrechetDistance", "FrechetDistance"]
 
-GaussianParameters = namedtuple("GaussianParameters", ["mean", "covariance"])
+
+class GaussianParameters(NamedTuple):
+    mean: np.ndarray
+    covariance: np.ndarray
 
 
 def compute_wasserstein_distance(
@@ -112,7 +114,6 @@ def __init__(
     ):
         self._reference_gaussian = fitted_gaussian
         self._descriptor_fn = descriptor_fn
-        self._dim = None
 
     def compute(self, generated_graphs: Collection[GraphType]) -> float:
         """Computes Frechet distance between reference and generated graphs.
 
@@ -64,18 +64,6 @@ def from_samples(
 
         return cls(mean=mean, std=std, low=low, high=high, coverage=coverage)
 
-    def __getitem__(self, key: str) -> Optional[float]:
-        if key == "mean":
-            return self.mean
-        elif key == "std":
-            return self.std
-        elif key == "low":
-            return self.low
-        elif key == "high":
-            return self.high
-        else:
-            raise ValueError(f"Invalid key: {key}")
-
     def __repr__(self):
         if self.coverage is not None:
             return f"MetricInterval(mean={self.mean}, std={self.std}, low={self.low}, high={self.high}, coverage={self.coverage})"
 
@@ -171,7 +171,7 @@ def _scores_to_informedness_and_threshold(
     )
     if ref_scores.ndim != 1:
         raise RuntimeError(
-            "ref_scores must be 1-dimensional, got shape {ref_scores.shape}. This should not happen, please file a bug report."
+            f"ref_scores must be 1-dimensional, got shape {ref_scores.shape}. This should not happen, please file a bug report."
         )
 
     assert ref_scores.ndim == 1 and gen_scores.ndim == 1
@@ -587,9 +587,9 @@ def compute(
 
         Returns:
             Typed dictionary of scores.
-                The key `"polygraphscore"` specifies the PolyGraphDiscrepancy, giving the estimated tightest lower-bound on the probability metric.
-                The key `"polygraphscore_descriptor"` specifies the descriptor that achieves this bound.
-                All descritor-wise scores are returned in the key `"subscores"`.
+                The key `"pgd"` specifies the PolyGraphDiscrepancy, giving the estimated tightest lower-bound on the probability metric.
+                The key `"pgd_descriptor"` specifies the descriptor that achieves this bound.
+                All descriptor-wise scores are returned in the key `"subscores"`.
         """
         all_metrics = {
             name: metric.compute(generated_graphs)
@@ -665,7 +665,7 @@ def compute(
             Typed dictionary of scores.
                 The key `"pgd"` specifies the PolyGraphDiscrepancy, giving mean and standard deviation as [`MetricInterval`][polygraph.metrics.base.metric_interval.MetricInterval] objects.
                 The key `"pgd_descriptor"` describes which descriptors achieve this score. This is a dictionary mapping descriptor names to the ratio of samples in which the descriptor was chosen.
-                All descritor-wise scores are returned in the key `"subscores"`. These are [`MetricInterval`][polygraph.metrics.base.metric_interval.MetricInterval] objects.
+                All descriptor-wise scores are returned in the key `"subscores"`. These are [`MetricInterval`][polygraph.metrics.base.metric_interval.MetricInterval] objects.
         """
         if len(generated_graphs) < 2 * self._subsample_size:
             raise ValueError(
 
@@ -63,6 +63,18 @@
 ]
 
 
+def _molecule_descriptors():
+    return {
+        "topochemical": TopoChemicalDescriptor(),
+        "morgan_fingerprint": FingerprintDescriptor(
+            algorithm="morgan", dim=128
+        ),
+        "chemnet": ChemNetDescriptor(dim=128),
+        "molclr": MolCLRDescriptor(dim=128),
+        "lipinski": LipinskiDescriptor(),
+    }
+
+
 class MoleculePGD(PolyGraphDiscrepancy[rdkit.Chem.Mol]):
     """MoleculePGD to compare molecule distributions, combining different molecule descriptors.
 
@@ -73,15 +85,7 @@ class MoleculePGD(PolyGraphDiscrepancy[rdkit.Chem.Mol]):
     def __init__(self, reference_molecules: Collection[rdkit.Chem.Mol]):
         super().__init__(
             reference_graphs=reference_molecules,
-            descriptors={
-                "topochemical": TopoChemicalDescriptor(),
-                "morgan_fingerprint": FingerprintDescriptor(
-                    algorithm="morgan", dim=128
-                ),
-                "chemnet": ChemNetDescriptor(dim=128),
-                "molclr": MolCLRDescriptor(dim=128),
-                "lipinski": LipinskiDescriptor(),
-            },
+            descriptors=_molecule_descriptors(),
             variant="jsd",
             classifier=None,
         )
@@ -106,15 +110,7 @@ def __init__(
     ):
         super().__init__(
             reference_graphs=reference_molecules,
-            descriptors={
-                "topochemical": TopoChemicalDescriptor(),
-                "morgan_fingerprint": FingerprintDescriptor(
-                    algorithm="morgan", dim=128
-                ),
-                "chemnet": ChemNetDescriptor(dim=128),
-                "molclr": MolCLRDescriptor(dim=128),
-                "lipinski": LipinskiDescriptor(),
-            },
+            descriptors=_molecule_descriptors(),
             subsample_size=subsample_size,
             num_samples=num_samples,
             variant="jsd",
 
@@ -57,6 +57,10 @@
 from polygraph.utils.kernels import AdaptiveRBFKernel
 from polygraph.metrics.base import MetricCollection
 
+_DEFAULT_RBF_BANDWIDTHS = np.array(
+    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
+)
+
 __all__ = [
     "RBFMMD2Benchmark",
     "RBFMMD2BenchmarkInterval",
@@ -139,9 +143,7 @@ def __init__(self, reference_graphs: Collection[nx.Graph]):
             reference_graphs=reference_graphs,
             kernel=AdaptiveRBFKernel(
                 descriptor_fn=OrbitCounts(),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             variant="biased",
         )
@@ -159,9 +161,7 @@ def __init__(
             reference_graphs=reference_graphs,
             kernel=AdaptiveRBFKernel(
                 descriptor_fn=OrbitCounts(),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             subsample_size=subsample_size,
             num_samples=num_samples,
@@ -176,9 +176,7 @@ def __init__(self, reference_graphs: Collection[nx.Graph]):
             reference_graphs=reference_graphs,
             kernel=AdaptiveRBFKernel(
                 descriptor_fn=ClusteringHistogram(bins=100),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             variant="biased",
         )
@@ -196,9 +194,7 @@ def __init__(
             reference_graphs=reference_graphs,
             kernel=AdaptiveRBFKernel(
                 descriptor_fn=ClusteringHistogram(bins=100),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             subsample_size=subsample_size,
             num_samples=num_samples,
@@ -213,9 +209,7 @@ def __init__(self, reference_graphs: Collection[nx.Graph]):
             reference_graphs=reference_graphs,
             kernel=AdaptiveRBFKernel(
                 descriptor_fn=SparseDegreeHistogram(),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             variant="biased",
         )
@@ -233,9 +227,7 @@ def __init__(
             reference_graphs=reference_graphs,
             kernel=AdaptiveRBFKernel(
                 descriptor_fn=SparseDegreeHistogram(),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             subsample_size=subsample_size,
             num_samples=num_samples,
@@ -250,9 +242,7 @@ def __init__(self, reference_graphs: Collection[nx.Graph]):
             reference_graphs=reference_graphs,
             kernel=AdaptiveRBFKernel(
                 descriptor_fn=EigenvalueHistogram(),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             variant="biased",
         )
@@ -270,9 +260,7 @@ def __init__(
             reference_graphs=reference_graphs,
             kernel=AdaptiveRBFKernel(
                 descriptor_fn=EigenvalueHistogram(),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             subsample_size=subsample_size,
             num_samples=num_samples,
@@ -304,9 +292,7 @@ def __init__(
                     ),
                     reference_graphs,
                 ),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             variant="biased",
         )
@@ -338,9 +324,7 @@ def __init__(
                     ),
                     reference_graphs,
                 ),
-                bw=np.array(
-                    [0.01, 0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0]
-                ),
+                bw=_DEFAULT_RBF_BANDWIDTHS,
             ),
             subsample_size=subsample_size,
             num_samples=num_samples,