-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathroute.ts
More file actions
380 lines (331 loc) · 13.9 KB
/
route.ts
File metadata and controls
380 lines (331 loc) · 13.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
import { access, readFile } from "node:fs/promises";
import { join } from "node:path";
import { type NextRequest, NextResponse } from "next/server";
export const dynamic = "force-dynamic";
// Regex pattern for removing .md extension
const MD_EXTENSION_REGEX = /\.md$/;
// Regex patterns for MDX to Markdown compilation (top-level for performance)
const FRONTMATTER_REGEX = /^---\n([\s\S]*?)\n---\n?/;
const IMPORT_FROM_REGEX = /^import\s+.*?from\s+['"].*?['"];?\s*$/gm;
const IMPORT_DIRECT_REGEX = /^import\s+['"].*?['"];?\s*$/gm;
const IMPORT_DESTRUCTURE_REGEX =
/^import\s*\{[\s\S]*?\}\s*from\s*['"].*?['"];?\s*$/gm;
const EXPORT_REGEX =
/^export\s+(const|let|var|function|default)\s+[\s\S]*?(?=\n(?:import|export|#|\n|$))/gm;
// JSX attribute pattern that properly handles:
// - Quoted strings containing ">" characters
// - JSX expressions in curly braces containing ">" (arrow functions, comparisons)
// - Multiline attributes (newlines allowed between attributes)
// - Up to 3 levels of brace nesting for style={{outer: {inner: 1}}} patterns
// The brace pattern uses a recursive-like structure to handle nested braces
const BRACE_CONTENT_L0 = "[^{}]*"; // Innermost: no braces
const BRACE_CONTENT_L1 = `(?:${BRACE_CONTENT_L0}|\\{${BRACE_CONTENT_L0}\\})*`; // 1 level
const BRACE_CONTENT_L2 = `(?:${BRACE_CONTENT_L0}|\\{${BRACE_CONTENT_L1}\\})*`; // 2 levels
const BRACE_PATTERN = `\\{${BRACE_CONTENT_L2}\\}`; // Full brace expression (supports 3 levels)
const JSX_ATTRS_PATTERN = `(?:[^>"'{}]|"[^"]*"|'[^']*'|${BRACE_PATTERN})*`;
const SELF_CLOSING_JSX_REGEX = new RegExp(
`<([A-Z][a-zA-Z0-9.]*)${JSX_ATTRS_PATTERN}\\/>`,
"g"
);
const JSX_WITH_CHILDREN_REGEX = new RegExp(
`<([A-Z][a-zA-Z0-9.]*)${JSX_ATTRS_PATTERN}>([\\s\\S]*?)<\\/\\1>`,
"g"
);
const CODE_BLOCK_REGEX = /```[\s\S]*?```/g;
const JSX_EXPRESSION_REGEX = new RegExp(BRACE_PATTERN, "g");
const EXCESSIVE_NEWLINES_REGEX = /\n{3,}/g;
const CODE_BLOCK_PLACEHOLDER_REGEX = /__CODE_BLOCK_(\d+)__/g;
// GuideOverview component patterns - convert to markdown headers
const GUIDE_OVERVIEW_OUTCOMES_REGEX =
/<GuideOverview\.Outcomes>\s*([\s\S]*?)\s*<\/GuideOverview\.Outcomes>/g;
const GUIDE_OVERVIEW_PREREQUISITES_REGEX =
/<GuideOverview\.Prerequisites>\s*([\s\S]*?)\s*<\/GuideOverview\.Prerequisites>/g;
const GUIDE_OVERVIEW_YOU_WILL_LEARN_REGEX =
/<GuideOverview\.YouWillLearn>\s*([\s\S]*?)\s*<\/GuideOverview\.YouWillLearn>/g;
// Image component pattern - extract alt and src for markdown image
// Handles both quoted strings and JSX expressions: alt="text" or alt={"text"}, src="/path" or src={"/path"}
const IMAGE_ALT_REGEX = /alt=(?:["']([^"']+)["']|\{["']([^"']+)["']\})/;
const IMAGE_SRC_REGEX = /src=(?:["']([^"']+)["']|\{["']([^"']+)["']\})/;
const IMAGE_COMPONENT_REGEX = /<Image\s+[^>]*?\/>/g;
// Internal markdown links - add .md extension
// Matches [text](/path) but not [text](http...) or [text](#anchor)
const INTERNAL_LINK_REGEX = /\[([^\]]+)\]\(\/([^)#][^)]*)\)/g;
// Check if path has a file extension
const HAS_EXTENSION_REGEX = /\.[a-zA-Z0-9]+$/;
// Regex for detecting markdown list items and numbered lists
const UNORDERED_LIST_REGEX = /^[-*+]\s/;
const ORDERED_LIST_REGEX = /^\d+[.)]\s/;
// Regex for extracting frontmatter fields
// Handles: "double quoted", 'single quoted', or unquoted values
// Group 1 = double-quoted content, Group 2 = single-quoted content, Group 3 = unquoted/fallback
// Quoted patterns require closing quote at end of line to prevent apostrophes being misread as delimiters
const TITLE_REGEX = /title:\s*(?:"([^"]*)"\s*$|'([^']*)'\s*$|([^\n]+))/;
const DESCRIPTION_REGEX =
/description:\s*(?:"([^"]*)"\s*$|'([^']*)'\s*$|([^\n]+))/;
// Regex for detecting leading whitespace on lines
const LEADING_WHITESPACE_REGEX = /^[ \t]+/;
/**
* Removes consistent leading indentation from all lines of text.
* This normalizes content that was indented inside JSX components.
* Code block markers (```) are ignored when calculating minimum indent
* since they typically start at column 0 in MDX files.
*/
function dedent(text: string): string {
const lines = text.split("\n");
// Find minimum indentation, ignoring:
// - Empty lines
// - Code block markers (lines starting with ```)
let minIndent = Number.POSITIVE_INFINITY;
for (const line of lines) {
const trimmed = line.trim();
if (trimmed === "" || trimmed.startsWith("```")) {
continue; // Ignore empty lines and code block markers
}
const match = line.match(LEADING_WHITESPACE_REGEX);
const indent = match ? match[0].length : 0;
if (indent < minIndent) {
minIndent = indent;
}
}
// If no indentation found, return as-is
if (minIndent === 0 || minIndent === Number.POSITIVE_INFINITY) {
return text;
}
// Remove the minimum indentation from each line (except code block content)
return lines
.map((line) => {
const trimmed = line.trim();
// Calculate leading whitespace length for this line
const leadingMatch = line.match(LEADING_WHITESPACE_REGEX);
const leadingLength = leadingMatch ? leadingMatch[0].length : 0;
// Don't modify empty lines or lines with less indentation than min
if (trimmed === "" || leadingLength < minIndent) {
return line.trimStart();
}
// Preserve code block markers - just remove leading whitespace
// This matches the logic that ignores them when calculating minIndent
if (trimmed.startsWith("```")) {
return trimmed;
}
return line.slice(minIndent);
})
.join("\n");
}
/**
* Strips surrounding quotes from a value if present.
* Used for unquoted fallback values that may contain quotes due to apostrophe handling.
*/
function stripSurroundingQuotes(value: string): string {
const trimmed = value.trim();
if (
(trimmed.startsWith('"') && trimmed.endsWith('"')) ||
(trimmed.startsWith("'") && trimmed.endsWith("'"))
) {
return trimmed.slice(1, -1);
}
return trimmed;
}
/**
* Extracts title and description from frontmatter.
* Handles double-quoted, single-quoted, and unquoted YAML values.
*/
function extractFrontmatterMeta(frontmatter: string): {
title: string;
description: string;
} {
const titleMatch = frontmatter.match(TITLE_REGEX);
const descriptionMatch = frontmatter.match(DESCRIPTION_REGEX);
// Extract from whichever capture group matched:
// Group 1 = double-quoted, Group 2 = single-quoted, Group 3 = unquoted/fallback
// For group 3 (fallback), strip surrounding quotes if present
const title =
titleMatch?.[1] ??
titleMatch?.[2] ??
stripSurroundingQuotes(titleMatch?.[3] ?? "");
const description =
descriptionMatch?.[1] ??
descriptionMatch?.[2] ??
stripSurroundingQuotes(descriptionMatch?.[3] ?? "");
return {
title: title || "Arcade Documentation",
description,
};
}
/**
* Normalizes indentation in the final output.
* Removes stray leading whitespace outside code blocks while preserving
* meaningful markdown indentation (nested lists, blockquotes).
*/
function normalizeIndentation(text: string): string {
const finalLines: string[] = [];
let inCodeBlock = false;
for (const line of text.split("\n")) {
if (line.trim().startsWith("```")) {
inCodeBlock = !inCodeBlock;
finalLines.push(line.trimStart()); // Code block markers should start at column 0
} else if (inCodeBlock) {
finalLines.push(line); // Preserve indentation inside code blocks
} else {
const trimmed = line.trimStart();
// Preserve indentation for nested list items and blockquotes
const isListItem =
UNORDERED_LIST_REGEX.test(trimmed) || ORDERED_LIST_REGEX.test(trimmed);
const isBlockquote = trimmed.startsWith(">");
if ((isListItem || isBlockquote) && line.startsWith(" ")) {
// Keep markdown-meaningful indentation (but normalize to 2-space increments)
const leadingSpaces = line.length - line.trimStart().length;
const normalizedIndent = " ".repeat(Math.floor(leadingSpaces / 2));
finalLines.push(normalizedIndent + trimmed);
} else {
finalLines.push(trimmed); // Remove leading whitespace for other lines
}
}
}
return finalLines.join("\n");
}
/**
* Compiles MDX content to clean markdown by:
* - Preserving frontmatter
* - Removing import statements
* - Converting JSX components to their text content
* - Preserving standard markdown
* - Providing fallback content for component-only pages
*/
function compileMdxToMarkdown(content: string, pagePath: string): string {
let result = content;
// Extract and preserve frontmatter if present
let frontmatter = "";
const frontmatterMatch = result.match(FRONTMATTER_REGEX);
if (frontmatterMatch) {
frontmatter = frontmatterMatch[0];
result = result.slice(frontmatterMatch[0].length);
}
// Remove import statements (various formats)
result = result.replace(IMPORT_FROM_REGEX, "");
result = result.replace(IMPORT_DIRECT_REGEX, "");
result = result.replace(IMPORT_DESTRUCTURE_REGEX, "");
// Remove export statements (like export const metadata)
result = result.replace(EXPORT_REGEX, "");
// Convert GuideOverview components to markdown headers before generic JSX stripping
result = result.replace(
GUIDE_OVERVIEW_OUTCOMES_REGEX,
(_, inner) => `## Outcomes\n\n${dedent(inner.trim())}\n`
);
result = result.replace(
GUIDE_OVERVIEW_PREREQUISITES_REGEX,
(_, inner) => `## Prerequisites\n\n${dedent(inner.trim())}\n`
);
result = result.replace(
GUIDE_OVERVIEW_YOU_WILL_LEARN_REGEX,
(_, inner) => `## You Will Learn\n\n${dedent(inner.trim())}\n`
);
// Convert Image components to markdown image syntax
// Extract alt and src from component attributes (handles both quoted and JSX expression syntax)
result = result.replace(IMAGE_COMPONENT_REGEX, (match) => {
const altMatch = match.match(IMAGE_ALT_REGEX);
const srcMatch = match.match(IMAGE_SRC_REGEX);
// Extract from whichever capture group matched (quoted or JSX expression)
const alt = altMatch?.[1] || altMatch?.[2];
const src = srcMatch?.[1] || srcMatch?.[2];
if (alt && src) {
// Make src absolute if it starts with /
const fullSrc = src.startsWith("/")
? `https://docs.arcade.dev${src}`
: src;
return ``;
}
return "";
});
// Process self-closing JSX components (e.g., <Component /> or <Component prop="value" />)
// Handles components with dots like <GuideOverview.Item />
result = result.replace(SELF_CLOSING_JSX_REGEX, "");
// Process JSX components with children - extract the text content
// Handles components with dots like <Tabs.Tab>content</Tabs.Tab>
// Keep processing until no more JSX components remain
let previousResult = "";
while (previousResult !== result) {
previousResult = result;
// Match opening tag, capture tag name (with dots), and content until matching closing tag
// Apply dedent to each extracted piece to normalize indentation
result = result.replace(JSX_WITH_CHILDREN_REGEX, (_, _tag, innerContent) =>
dedent(innerContent.trim())
);
}
// Remove any remaining JSX expressions like {variable} or {expression}
// But preserve code blocks by temporarily replacing them
const codeBlocks: string[] = [];
result = result.replace(CODE_BLOCK_REGEX, (match) => {
codeBlocks.push(match);
return `__CODE_BLOCK_${codeBlocks.length - 1}__`;
});
// Now remove JSX expressions outside code blocks
result = result.replace(JSX_EXPRESSION_REGEX, "");
// Restore code blocks (return original placeholder if index doesn't exist)
result = result.replace(
CODE_BLOCK_PLACEHOLDER_REGEX,
(match, index) => codeBlocks[Number.parseInt(index, 10)] ?? match
);
// Convert internal links to .md links for LLM consumption
// [text](/path/to/page) -> [text](/path/to/page.md)
result = result.replace(INTERNAL_LINK_REGEX, (_, text, path) => {
// Don't add .md if path already has an extension
if (HAS_EXTENSION_REGEX.test(path)) {
return `[${text}](/${path})`;
}
return `[${text}](/${path}.md)`;
});
// Normalize indentation (remove stray whitespace, preserve meaningful markdown indentation)
result = normalizeIndentation(result);
// Clean up excessive blank lines (more than 2 consecutive)
result = result.replace(EXCESSIVE_NEWLINES_REGEX, "\n\n");
// Trim leading/trailing whitespace
result = result.trim();
// If content is essentially empty (component-only page), provide fallback
if (!result || result.length < 10) {
const { title, description } = extractFrontmatterMeta(frontmatter);
const htmlUrl = `https://docs.arcade.dev${pagePath}`;
return `${frontmatter}# ${title}
${description}
This page contains interactive content. Visit the full page at: ${htmlUrl}
`;
}
// Reconstruct with frontmatter
return `${frontmatter}${result}\n`;
}
export async function GET(
request: NextRequest,
_context: { params: Promise<{ slug?: string[] }> }
) {
try {
// Get the original pathname from the request
const url = new URL(request.url);
// Remove /api/markdown prefix to get the original path
const originalPath = url.pathname.replace("/api/markdown", "");
// Remove .md extension
const pathWithoutMd = originalPath.replace(MD_EXTENSION_REGEX, "");
// Map URL to file path
// e.g., /en/home/quickstart -> app/en/home/quickstart/page.mdx
const filePath = join(process.cwd(), "app", `${pathWithoutMd}/page.mdx`);
// Check if file exists
try {
await access(filePath);
} catch {
return new NextResponse("Markdown file not found", { status: 404 });
}
const rawContent = await readFile(filePath, "utf-8");
// Compile MDX to clean markdown
const content = compileMdxToMarkdown(rawContent, pathWithoutMd);
// Return the compiled markdown with proper headers
return new NextResponse(content, {
status: 200,
headers: {
"Content-Type": "text/markdown; charset=utf-8",
"Content-Disposition": "inline",
},
});
} catch (error) {
return new NextResponse(`Internal server error: ${error}`, {
status: 500,
});
}
}