-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgate_engine.py
More file actions
169 lines (146 loc) · 5.97 KB
/
gate_engine.py
File metadata and controls
169 lines (146 loc) · 5.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""Unified validation gate runner (SPEC section 11): structured report + parity with validate_repo."""
from __future__ import annotations
import json
import sys
from pathlib import Path
from typing import Any, Callable
from pydantic import BaseModel, Field
from sm_pipeline.validate.coverage import validate_coverage
from sm_pipeline.validate.extraction_artifacts import validate_extraction_run_required
from sm_pipeline.validate.graph import (
validate_dependency_graph_bootstrap_warn,
validate_dependency_graph_quality_warn,
validate_graph,
)
from sm_pipeline.validate.migration import validate_migration_doc
from sm_pipeline.validate.normalization import validate_normalization
from sm_pipeline.validate.provenance import validate_provenance
from sm_pipeline.validate.reviewer import validate_reviewer_lifecycle, validate_claim_value_policy
from sm_pipeline.validate.snapshot_quality import validate_snapshot_quality
from sm_pipeline.validate.llm_proposals import validate_llm_proposal_sidecars_warn
from sm_pipeline.validate.theorem_card_reviewer import validate_theorem_card_reviewer
class GateStepResult(BaseModel):
gate_id: str
check_id: str
status: str # "ok" | "warn"
message: str = ""
class GateReport(BaseModel):
"""Machine-readable report for CI and local tooling."""
repo_root: str
ok: bool = True
steps: list[GateStepResult] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
def to_json_dict(self) -> dict[str, Any]:
return self.model_dump(mode="json")
def _echo_recommendations(repo_root: Path) -> None:
"""Non-blocking hints: suggest DOI/arXiv when missing for papers with past year."""
papers_dir = repo_root / "corpus" / "papers"
index_path = repo_root / "corpus" / "index.json"
if not index_path.exists():
return
index = json.loads(index_path.read_text(encoding="utf-8"))
paper_list = index.get("papers") or []
try:
from datetime import datetime
current_year = datetime.now().year
except Exception:
return
for entry in paper_list:
if not isinstance(entry, dict):
continue
paper_id = entry.get("id")
year = entry.get("year")
if not paper_id or not isinstance(year, (int, float)):
continue
if int(year) > current_year:
continue
meta_path = papers_dir / paper_id / "metadata.json"
if not meta_path.exists():
continue
try:
meta = json.loads(meta_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
continue
if not isinstance(meta, dict):
continue
source = meta.get("source") or {}
if source.get("doi") or source.get("arxiv_id"):
continue
msg = (
f"Recommendation: paper {paper_id} has no DOI or arXiv ID; "
"consider setting metadata.source.doi or metadata.source.arxiv_id"
)
print(msg, file=sys.stderr)
def run_all_gates(repo_root: Path) -> GateReport:
"""
Run all validation checks in deterministic order (parity with legacy validate_repo).
Raises on first failing check that previously raised.
"""
from sm_pipeline.validate.schemas import validate_json_schemas_and_kernels
repo_root = repo_root.resolve()
report = GateReport(repo_root=str(repo_root))
checks: list[tuple[str, str, Callable[[Path], None]]] = [
("gate2", "json_schemas_and_kernels", validate_json_schemas_and_kernels),
("gate2", "normalization_integrity", validate_normalization),
("gate3", "provenance_integrity", validate_provenance),
("gate2", "extraction_run_required", validate_extraction_run_required),
("gate2", "graph_integrity", validate_graph),
("gate2", "migration_doc", validate_migration_doc),
("gate2", "claim_value_policy", validate_claim_value_policy),
("gate2", "reviewer_lifecycle", validate_reviewer_lifecycle),
("gate2", "theorem_card_reviewer", validate_theorem_card_reviewer),
("gate4", "coverage_integrity", validate_coverage),
]
for gate_id, check_id, fn in checks:
fn(repo_root)
report.steps.append(GateStepResult(gate_id=gate_id, check_id=check_id, status="ok"))
snapshot_warnings = validate_snapshot_quality(repo_root)
for w in snapshot_warnings:
report.warnings.append(w)
print(f"Snapshot quality (warn): {w}", file=sys.stderr)
report.steps.append(
GateStepResult(
gate_id="gate2",
check_id="snapshot_quality",
status="warn",
message=w,
)
)
dep_bootstrap_warnings = validate_dependency_graph_bootstrap_warn(repo_root)
for w in dep_bootstrap_warnings:
report.warnings.append(w)
print(f"Dependency graph (warn): {w}", file=sys.stderr)
report.steps.append(
GateStepResult(
gate_id="gate2",
check_id="dependency_graph_bootstrap",
status="warn",
message=w,
)
)
dep_quality_warnings = validate_dependency_graph_quality_warn(repo_root)
for w in dep_quality_warnings:
report.warnings.append(w)
print(f"Dependency graph quality (warn): {w}", file=sys.stderr)
report.steps.append(
GateStepResult(
gate_id="gate2",
check_id="dependency_graph_quality",
status="warn",
message=w,
)
)
llm_warnings = validate_llm_proposal_sidecars_warn(repo_root)
for w in llm_warnings:
report.warnings.append(w)
print(f"Suggestion sidecar (warn): {w}", file=sys.stderr)
report.steps.append(
GateStepResult(
gate_id="gate2",
check_id="suggestion_sidecars",
status="warn",
message=w,
)
)
_echo_recommendations(repo_root)
return report