Skip to content

Commit a3b9698

Browse files
committed
Fix critical, high, and medium review issues in core library
kernel_lr.py: - C1: Use K.shape[0] for alpha_init instead of len(X) - C4: Use np.logaddexp(0, -yf) for numerical stability - H1: Merge objective/gradient to eliminate redundant K @ alpha - H2: Avoid double featurization when X2 is None - L1: Remove unused random_state and project_dim parameters vun.py: - C2: Replace signal.SIGALRM timeout with ThreadPoolExecutor (works in multiprocessing workers and on Windows) - H3: Add edges="links" to nx.node_link_data/graph calls - M7: Remove section separator comments generic_descriptors.py: - M2: Replace .get() defensive defaults with direct access - M3: Remove bare except Exception in PyramidMatchDescriptor io.py: - M1: Replace hasattr with isinstance(obj, np.generic)
1 parent 3bf5004 commit a3b9698

4 files changed

Lines changed: 68 additions & 79 deletions

File tree

polygraph/metrics/base/kernel_lr.py

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,11 @@ def __init__(
3737
use_node_labels: bool = False,
3838
node_label_key: Optional[str] = None,
3939
digest_size: int = 4,
40-
project_dim: Optional[int] = None,
4140
C: float = 1.0,
4241
max_iter: int = 100,
4342
tol: float = 1e-4,
4443
n_jobs: int = 1,
4544
verbose: bool = False,
46-
random_state: Optional[int] = None,
4745
normalize_kernel: bool = True,
4846
center_kernel: bool = True,
4947
):
@@ -52,13 +50,11 @@ def __init__(
5250
self.use_node_labels = use_node_labels
5351
self.node_label_key = node_label_key
5452
self.digest_size = digest_size
55-
self.project_dim = project_dim
5653
self.C = C
5754
self.max_iter = max_iter
5855
self.tol = tol
5956
self.n_jobs = n_jobs
6057
self.verbose = verbose
61-
self.random_state = random_state
6258
self.normalize_kernel = normalize_kernel
6359
self.center_kernel = center_kernel
6460

@@ -101,15 +97,15 @@ def _compute_kernel_matrix(
10197
X2: Optional[Union[List[nx.Graph], np.ndarray, csr_array]] = None,
10298
) -> np.ndarray:
10399
"""Compute kernel matrix between X1 and X2."""
104-
if X2 is None:
105-
X2 = X1
106-
107100
is_array = isinstance(X1, (np.ndarray, csr_array))
108101
kernel = self._resolve_kernel(is_array, show_progress=self.verbose)
109102

103+
if X2 is None:
104+
features1 = X1 if is_array else kernel.featurize(X1)
105+
return kernel.pre_gram_block(features1, features1) # pyright: ignore[reportArgumentType]
106+
110107
features1 = X1 if is_array else kernel.featurize(X1)
111108
features2 = X2 if is_array else kernel.featurize(X2) # pyright: ignore[reportArgumentType]
112-
113109
return kernel.pre_gram_block(features1, features2) # pyright: ignore[reportArgumentType]
114110

115111
def _compute_kernel_diag(
@@ -121,26 +117,28 @@ def _compute_kernel_diag(
121117
features = X if is_array else kernel.featurize(X)
122118
return kernel.kernel_diag(features) # pyright: ignore[reportArgumentType]
123119

124-
def _objective(
125-
self, alpha: np.ndarray, K: np.ndarray, y: np.ndarray
126-
) -> float:
127-
"""Objective function minimizing log-likelihood plus regularization."""
128-
f = K @ alpha
129-
yf = y * f
130-
log_likelihood = np.sum(np.log(1 + np.exp(-yf)))
131-
regularization = (1.0 / (2.0 * self.C)) * (alpha.T @ K @ alpha)
132-
return log_likelihood + regularization
133-
134-
def _gradient(
135-
self, alpha: np.ndarray, K: np.ndarray, y: np.ndarray
136-
) -> np.ndarray:
137-
"""Gradient of the objective function."""
138-
f = K @ alpha
139-
yf = y * f
120+
def _objective_and_gradient(
121+
self,
122+
alpha: np.ndarray,
123+
K: np.ndarray,
124+
y: np.ndarray,
125+
) -> tuple[float, np.ndarray]:
126+
"""Objective and gradient for L-BFGS-B optimization.
127+
128+
Returns both to avoid redundant K @ alpha computation.
129+
"""
130+
Ka = K @ alpha
131+
yf = y * Ka
132+
log_likelihood = np.sum(np.logaddexp(0, -yf))
133+
regularization = (1.0 / (2.0 * self.C)) * (alpha.T @ Ka)
134+
objective = log_likelihood + regularization
135+
140136
sigmoid_neg_yf = self._sigmoid(-yf)
141137
grad_log_likelihood = -K.T @ (y * sigmoid_neg_yf)
142-
grad_regularization = (1.0 / self.C) * (K @ alpha)
143-
return grad_log_likelihood + grad_regularization
138+
grad_regularization = (1.0 / self.C) * Ka
139+
gradient = grad_log_likelihood + grad_regularization
140+
141+
return objective, gradient
144142

145143
def fit(
146144
self, X: Union[List[nx.Graph], np.ndarray, csr_array], y: np.ndarray
@@ -172,14 +170,14 @@ def fit(
172170
K = self.centerer_.transform(K)
173171

174172
self.kernel_ = K
175-
alpha_init = np.zeros(len(X))
173+
alpha_init = np.zeros(K.shape[0])
176174

177175
result = minimize(
178-
fun=self._objective,
176+
fun=self._objective_and_gradient,
179177
x0=alpha_init,
180178
args=(K, y),
181179
method="L-BFGS-B",
182-
jac=self._gradient,
180+
jac=True,
183181
options={"maxiter": self.max_iter, "ftol": self.tol},
184182
)
185183

polygraph/metrics/vun.py

Lines changed: 28 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@
2828
"""
2929

3030
import json
31-
import signal
3231
from collections import defaultdict
32+
from concurrent.futures import (
33+
ThreadPoolExecutor,
34+
TimeoutError as FuturesTimeoutError,
35+
)
3336
from functools import partial
3437
from multiprocessing import Pool
3538
from typing import (
@@ -76,34 +79,23 @@ def __repr__(self) -> str:
7679
)
7780

7881

79-
class _IsomorphismTimeout(Exception):
80-
pass
81-
82-
83-
def _timeout_handler(signum, frame):
84-
raise _IsomorphismTimeout()
85-
86-
8782
def _is_isomorphic_with_timeout(
8883
g: nx.Graph,
8984
h: nx.Graph,
9085
timeout: int,
9186
) -> bool:
92-
"""Run isomorphism check with a SIGALRM timeout.
87+
"""Run isomorphism check with a timeout.
9388
9489
If the check exceeds ``timeout`` seconds, conservatively returns True
95-
(i.e. treats the pair as isomorphic).
90+
(i.e. treats the pair as isomorphic). This is safe for novelty/
91+
uniqueness metrics because it can only decrease the reported score.
9692
"""
97-
old_handler = signal.signal(signal.SIGALRM, _timeout_handler)
98-
signal.alarm(timeout)
99-
try:
100-
result = nx.is_isomorphic(g, h)
101-
except _IsomorphismTimeout:
102-
result = True
103-
finally:
104-
signal.alarm(0)
105-
signal.signal(signal.SIGALRM, old_handler)
106-
return result
93+
with ThreadPoolExecutor(max_workers=1) as executor:
94+
future = executor.submit(nx.is_isomorphic, g, h)
95+
try:
96+
return future.result(timeout=timeout)
97+
except FuturesTimeoutError:
98+
return True
10799

108100

109101
class _GraphSet:
@@ -138,32 +130,29 @@ def __contains__(self, g: nx.Graph) -> bool:
138130
return False
139131

140132

141-
# ---------------------------------------------------------------------------
142-
# Parallel worker functions (must be importable, not in __main__)
143-
# ---------------------------------------------------------------------------
144-
145-
146133
def _check_novel_worker(
147134
gen_graph_json: str,
148135
train_graphs_json: List[str],
149136
train_hashes: Dict[str, List[int]],
150137
iso_timeout: int,
151138
) -> bool:
152139
"""Check if a single generated graph is novel (not in training set)."""
153-
g = nx.node_link_graph(json.loads(gen_graph_json))
140+
g = nx.node_link_graph(json.loads(gen_graph_json), edges="links")
154141
fp = nx.weisfeiler_lehman_graph_hash(g)
155142
if fp not in train_hashes:
156143
return True
157144
for idx in train_hashes[fp]:
158-
h = nx.node_link_graph(json.loads(train_graphs_json[idx]))
145+
h = nx.node_link_graph(
146+
json.loads(train_graphs_json[idx]), edges="links"
147+
)
159148
if _is_isomorphic_with_timeout(g, h, iso_timeout):
160149
return False
161150
return True
162151

163152

164153
def _check_validity_worker(graph_json: str, validity_fn: Callable) -> bool:
165154
"""Check validity of a single graph in a worker process."""
166-
g = nx.node_link_graph(json.loads(graph_json))
155+
g = nx.node_link_graph(json.loads(graph_json), edges="links")
167156
return validity_fn(g)
168157

169158

@@ -215,9 +204,13 @@ def _compute_novel_parallel(
215204
self, generated_graphs: List[nx.Graph]
216205
) -> List[bool]:
217206
train_json = [
218-
json.dumps(nx.node_link_data(g)) for g in self._train_graphs
207+
json.dumps(nx.node_link_data(g, edges="links"))
208+
for g in self._train_graphs
209+
]
210+
gen_json = [
211+
json.dumps(nx.node_link_data(g, edges="links"))
212+
for g in generated_graphs
219213
]
220-
gen_json = [json.dumps(nx.node_link_data(g)) for g in generated_graphs]
221214
worker_fn = partial(
222215
_check_novel_worker,
223216
train_graphs_json=train_json,
@@ -230,7 +223,10 @@ def _compute_novel_parallel(
230223
def _compute_valid_parallel(
231224
self, generated_graphs: List[nx.Graph]
232225
) -> List[bool]:
233-
gen_json = [json.dumps(nx.node_link_data(g)) for g in generated_graphs]
226+
gen_json = [
227+
json.dumps(nx.node_link_data(g, edges="links"))
228+
for g in generated_graphs
229+
]
234230
assert self._validity_fn is not None
235231
worker_fn = partial(
236232
_check_validity_worker, validity_fn=self._validity_fn

polygraph/utils/descriptors/generic_descriptors.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -669,8 +669,7 @@ def _get_node_labels(self, graph: nx.Graph) -> Optional[dict]:
669669
if self.node_label_key is None:
670670
return None
671671
return {
672-
u: str(graph.nodes[u].get(self.node_label_key, "__UNK__"))
673-
for u in graph.nodes()
672+
u: str(graph.nodes[u][self.node_label_key]) for u in graph.nodes()
674673
}
675674

676675
def _hash_feature_key(
@@ -868,17 +867,14 @@ def _compute_features(self, graph: nx.Graph) -> dict:
868867
feats = [d for _, d in graph.degree()]
869868
feats = np.array(feats).reshape(-1, 1)
870869
else:
871-
try:
872-
feats_list = []
873-
for _, data in graph.nodes(data=True):
874-
val = data.get(self.node_label_key, 0)
875-
if isinstance(val, (list, np.ndarray)):
876-
feats_list.append(val)
877-
else:
878-
feats_list.append([val])
879-
feats = np.array(feats_list)
880-
except Exception:
881-
return {}
870+
feats_list = []
871+
for _, data in graph.nodes(data=True):
872+
val = data[self.node_label_key]
873+
if isinstance(val, (list, np.ndarray)):
874+
feats_list.append(val)
875+
else:
876+
feats_list.append([val])
877+
feats = np.array(feats_list)
882878

883879
if len(feats) == 0:
884880
return {}

polygraph/utils/io.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from pathlib import Path
1010
from typing import Any, Mapping, Optional
1111

12+
import numpy as np
13+
1214

1315
def _json_default(obj: Any) -> Any:
1416
"""Fallback serializer for ``json.dumps``.
@@ -18,11 +20,8 @@ def _json_default(obj: Any) -> Any:
1820
"""
1921
if isinstance(obj, (dt.datetime, dt.date)):
2022
return obj.isoformat()
21-
if hasattr(obj, "item"):
22-
try:
23-
return obj.item()
24-
except Exception:
25-
return str(obj)
23+
if isinstance(obj, np.generic):
24+
return obj.item()
2625
if isinstance(obj, Path):
2726
return str(obj)
2827
return str(obj)

0 commit comments

Comments
 (0)