Skip to content

Commit b218ac3

Browse files
committed
Added tests for snippets in markdown
1 parent 16deea9 commit b218ac3

5 files changed

Lines changed: 178 additions & 39 deletions

File tree

tests/collect_doc_snippets.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,6 @@ def extract_code_blocks_from_docstring(docstring):
2929
return fixed_blocks
3030

3131

32-
def run_code_snippet(code, context=None):
33-
"""Run a snippet of code and catch any exceptions."""
34-
context = context or {}
35-
exec(code, context)
36-
37-
3832
def extract_all_docstrings_from_module(module):
3933
"""Get all docstrings from a module, its classes, and functions."""
4034
docstrings = []

tests/collect_markdown_snippets.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import os
2+
import re
3+
import pathlib
4+
import textwrap
5+
from typing import Iterator, List, Tuple
6+
7+
# ----------------------------
8+
# Configuration
9+
# ----------------------------
10+
BASE_DIR = pathlib.Path(__file__).resolve().parent.parent
11+
DOCS_DIR = BASE_DIR / "docs"
12+
13+
# Regex to match triple-backticked code blocks:
14+
# - PYTHON_ONLY matches blocks explicitly labeled as Python (python|py|python3|pycon|ipython)
15+
# - ANY_LABEL_OR_UNLABELED matches python-labeled OR unlabeled blocks
16+
PYTHON_CODE_BLOCK_PATTERN = re.compile(
17+
r"```\s*(?:python|py|python3|pycon|ipython)(?:[^\n]*)?\s*\n(.*?)```",
18+
re.DOTALL,
19+
)
20+
ANY_CODE_BLOCK_PATTERN = re.compile(
21+
r"```(?:\s*(?:python|py|python3|pycon|ipython)(?:[^\n]*)?)?\s*\n(.*?)```",
22+
re.DOTALL,
23+
)
24+
25+
# Directories to skip when walking the docs tree (common build outputs)
26+
SKIP_DIRS = {"_build", ".git", ".venv", "build", "site", "dist", "node_modules"}
27+
28+
29+
def _extract_code_blocks_from_markdown(
30+
markdown_text: str, only_python: bool = True
31+
) -> List[str]:
32+
"""Extract triple-backtick code blocks from a markdown string and fix indentation.
33+
34+
If only_python is True, only code blocks explicitly labeled as Python are captured.
35+
Otherwise, unlabeled code blocks are captured as well.
36+
"""
37+
pattern = (
38+
PYTHON_CODE_BLOCK_PATTERN if only_python else ANY_CODE_BLOCK_PATTERN
39+
)
40+
code_blocks = pattern.findall(markdown_text or "")
41+
42+
fixed_blocks: List[str] = []
43+
for code in code_blocks:
44+
dedented_code = textwrap.dedent(code)
45+
fixed_blocks.append(dedented_code)
46+
47+
return fixed_blocks
48+
49+
50+
def _iter_markdown_files(root: pathlib.Path) -> Iterator[pathlib.Path]:
51+
"""Yield markdown files (.md, .mdx) under root, skipping common build dirs."""
52+
if not root.exists():
53+
return
54+
55+
for dirpath, dirnames, filenames in os.walk(root):
56+
# In-place prune dirs to skip
57+
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
58+
59+
for filename in filenames:
60+
if filename.lower().endswith((".md", ".mdx")):
61+
yield pathlib.Path(dirpath) / filename
62+
63+
64+
def gather_markdown_snippets(only_python: bool = True) -> List[Tuple[str, str]]:
65+
"""Gather all triple-backtick code snippets from markdown docs in `docs/`.
66+
67+
For each markdown file, concatenate all discovered code blocks into a single
68+
Python snippet. Returns a list of (file_identifier, concatenated_code) tuples.
69+
The identifier is the path to the markdown file relative to the repo root.
70+
71+
Parameters
72+
----------
73+
only_python: bool
74+
If True (default), only capture code fences labeled as Python. If False,
75+
also capture unlabeled fences.
76+
"""
77+
print("Gathering markdown snippets from docs/ ...")
78+
79+
snippets: List[Tuple[str, str]] = []
80+
81+
if not DOCS_DIR.exists():
82+
print(f"docs/ directory not found at {DOCS_DIR}")
83+
return snippets
84+
85+
files = list(_iter_markdown_files(DOCS_DIR))
86+
print(f"Found {len(files)} markdown files to process")
87+
88+
for index, md_path in enumerate(sorted(files)):
89+
rel_path = md_path.relative_to(BASE_DIR)
90+
print(f"[{index + 1}/{len(files)}] Processing: {rel_path}")
91+
try:
92+
text = md_path.read_text(encoding="utf-8")
93+
except Exception as exc:
94+
print(
95+
f" -> Failed to read {rel_path}: {type(exc).__name__}: {exc}"
96+
)
97+
# Treat read errors as test cases too for visibility
98+
snippets.append(
99+
(
100+
f"{rel_path} (read error)",
101+
f"# read error\nraise IOError({repr(str(exc))})",
102+
)
103+
)
104+
continue
105+
106+
code_blocks = _extract_code_blocks_from_markdown(
107+
text, only_python=only_python
108+
)
109+
print(f" -> Found {len(code_blocks)} code blocks")
110+
111+
if code_blocks:
112+
# Concatenate all blocks with spacing to avoid accidental token pasting
113+
concatenated = (
114+
"\n\n".join(block.strip("\n") for block in code_blocks) + "\n"
115+
)
116+
identifier = f"{rel_path}"
117+
snippets.append((identifier, concatenated))
118+
print(f" -> Added concatenated snippet for: {identifier}")
119+
120+
print(f"Finished gathering snippets. Total: {len(snippets)} snippets")
121+
return snippets
122+
123+
124+
__all__ = [
125+
"gather_markdown_snippets",
126+
]

tests/conftest.py

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,11 @@
3535
from polygraph.datasets.molecules import QM9
3636

3737
from collect_doc_snippets import gather_docstring_snippets
38+
from collect_markdown_snippets import gather_markdown_snippets
3839

3940
# Cache the snippets to avoid calling gather_docstring_snippets multiple times
40-
_cached_snippets = None
41+
_cached_doc_snippets = None
42+
_cached_md_snippets = None
4143

4244
NO_SKIP_OPTION = "--no-skip"
4345
SAMPLE_SIZE_OPTION = "--sample-size"
@@ -234,18 +236,33 @@ def sample_molecules():
234236

235237

236238
def pytest_generate_tests(metafunc):
237-
if "code_snippet" in metafunc.fixturenames:
238-
global _cached_snippets
239-
if _cached_snippets is None:
239+
if "doc_snippet" in metafunc.fixturenames:
240+
global _cached_doc_snippets
241+
if _cached_doc_snippets is None:
240242
print(
241243
"pytest_generate_tests: Gathering docstring snippets with debug output..."
242244
)
243-
_cached_snippets = gather_docstring_snippets()
245+
_cached_doc_snippets = gather_docstring_snippets()
244246
print(
245-
f"pytest_generate_tests: Found {len(_cached_snippets)} snippets"
247+
f"pytest_generate_tests: Found {len(_cached_doc_snippets)} snippets"
246248
)
247249
metafunc.parametrize(
248-
"code_snippet",
249-
_cached_snippets,
250-
ids=[snippet[0] for snippet in _cached_snippets],
250+
"doc_snippet",
251+
_cached_doc_snippets,
252+
ids=[snippet[0] for snippet in _cached_doc_snippets],
253+
)
254+
if "md_snippet" in metafunc.fixturenames:
255+
global _cached_md_snippets
256+
if _cached_md_snippets is None:
257+
print(
258+
"pytest_generate_tests: Gathering markdown snippets with debug output..."
259+
)
260+
_cached_md_snippets = gather_markdown_snippets()
261+
print(
262+
f"pytest_generate_tests: Found {len(_cached_md_snippets)} snippets"
263+
)
264+
metafunc.parametrize(
265+
"md_snippet",
266+
_cached_md_snippets,
267+
ids=[snippet[0] for snippet in _cached_md_snippets],
251268
)

tests/test_docstring_snippets.py

Lines changed: 0 additions & 24 deletions
This file was deleted.

tests/test_snippets.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import pytest
2+
import traceback
3+
4+
5+
def run_code_snippet(code, context=None):
6+
"""Run a snippet of code and catch any exceptions."""
7+
context = context or {}
8+
exec(code, context)
9+
10+
11+
def test_docstring_snippet_runs(doc_snippet):
12+
"""Each code snippet from a docstring should run without error."""
13+
snippet_id, code = doc_snippet
14+
15+
try:
16+
run_code_snippet(code)
17+
except Exception:
18+
pytest.fail(f"Snippet failed: {snippet_id}\n\n{traceback.format_exc()}")
19+
20+
21+
def test_markdown_snippet_runs(md_snippet):
22+
snippet_id, code = md_snippet
23+
try:
24+
run_code_snippet(code)
25+
except Exception:
26+
pytest.fail(f"Snippet failed: {snippet_id}\n\n{traceback.format_exc()}")

0 commit comments

Comments
 (0)