Skip to content

Commit 36db43c

Browse files
committed
Some fixes to docs sitemap generation
1 parent 3a70ca0 commit 36db43c

2 files changed

Lines changed: 72 additions & 98 deletions

File tree

docs/source/conf.py

Lines changed: 72 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
DUCK_DOCS_URL = "https://docs.duckframework.com"
1313
DUCK_PACKAGE_RELATIVE_PATH = "../../duck"
1414

15+
# Metadata for sitemap generation
16+
DOCS_DIR = pathlib.Path(__file__).parent.parent
17+
DOCS_SOURCE_DIRS = ( "source", "source/api")
18+
1519
# Path to the duck package's __init__.py
1620
DUCK_INIT_PATH = (
1721
pathlib.Path(__file__).resolve().parent / DUCK_PACKAGE_RELATIVE_PATH / "__init__.py"
@@ -20,7 +24,7 @@
2024
# This must be called before any use of the duck.settings module e.g. through duck.app
2125
os.environ["DUCK_SETTINGS_MODULE"] = "duck.etc.structures.projects.testing.web.settings"
2226
os.environ["DJANGO_SETTINGS_MODULE"] = "duck.etc.structures.projects.testing.web.backend.django.duckapp.duckapp.settings"
23-
27+
2428

2529
# Entry point to sphinx
2630
def setup(app):
@@ -39,26 +43,6 @@ def on_build_finished(app, exception):
3943
app: The Sphinx application object.
4044
exception: Exception raised during build, if any.
4145
"""
42-
from duck.logging import console
43-
44-
if exception is not None:
45-
console.log("Build failed, skipping custom post-build task, no sitemap will be generated.", level=console.WARNING)
46-
return
47-
48-
console.log("Build completed successfully. Running post-build task...", level=console.WARNING)
49-
try:
50-
# Ensure docs/source is importable, then import sitemap without relying on package context
51-
this_dir = pathlib.Path(__file__).resolve().parent
52-
sys.path.insert(0, str(this_dir))
53-
54-
# Import sitemap generation function
55-
from sitemap import generate_sitemap
56-
57-
except Exception as e:
58-
console.log(f"Skipping sitemap generation (import failed).", level=console.WARNING)
59-
return
60-
61-
# Build the sitemap
6246
generate_sitemap()
6347

6448

@@ -91,7 +75,73 @@ def read_metadata_from_init(init_path):
9175
return metadata
9276

9377

94-
# -- Project information -----------------------------------------------------
78+
def generate_sitemap():
79+
"""
80+
This must be called after sphinx build.
81+
82+
The sitemap.xml is placed in `build/html`.
83+
"""
84+
from duck.logging import console
85+
from duck.contrib.sitemap import SitemapBuilder
86+
87+
urls = set()
88+
89+
for source_dir in DOCS_SOURCE_DIRS:
90+
try:
91+
abs_dir = DOCS_DIR / source_dir
92+
for entry in os.scandir(abs_dir):
93+
if entry.is_file():
94+
filename = entry.name
95+
docname = None
96+
97+
if filename == "index.rst":
98+
docname = ""
99+
100+
elif filename.endswith(".md"):
101+
docname = filename.replace(".md", "")
102+
103+
elif filename.endswith(".html"):
104+
docname = filename.replace(".html", "")
105+
106+
elif filename.endswith(".rst"):
107+
docname = filename.replace(".rst", "")
108+
109+
if source_dir == "source":
110+
# This is the root source directory for main docs.
111+
# Check if docname is set.
112+
if docname is not None:
113+
urls.add(f"{DOCS_URL}/{docname}")
114+
115+
elif source_dir == "source/api":
116+
# This is the source directory for API docs.
117+
# This directory contains only html files.
118+
# Check if docname is set.
119+
if docname is not None:
120+
urls.add(f"{DOCS_URL}/api/{docname}")
121+
122+
else:
123+
raise ValueError(f"Unknown source directory '{source_dir}', expected 'source' or 'source/api'.")
124+
125+
except FileNotFoundError as e:
126+
console.log(f"Caught an error whilst scanning source dirs: {e}", level=console.WARNING)
127+
128+
# Build the sitemap.
129+
sitemap_filepath = DOCS_DIR / "build/html/sitemap.xml"
130+
131+
# Initialize the builder.
132+
builder = SitemapBuilder(
133+
server_url=DOCS_URL, # Parsing None will automatically resolve server URL
134+
save_to_file=True,
135+
filepath=sitemap_filepath,
136+
extra_urls=urls,
137+
)
138+
139+
# Generate the sitemap and save it accordingly.
140+
builder.build()
141+
console.log(f"Sitemap has been saved at {sitemap_filepath}", level=console.DEBUG)
142+
143+
144+
# Project information
95145

96146
# Extract metadata from duck/__init__.py
97147
metadata = read_metadata_from_init(DUCK_INIT_PATH)

docs/source/sitemap.py

Lines changed: 0 additions & 76 deletions
This file was deleted.

0 commit comments

Comments
 (0)