-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathscrape_multi_format.ts
More file actions
62 lines (52 loc) · 1.87 KB
/
scrape_multi_format.ts
File metadata and controls
62 lines (52 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import { ScrapeGraphAI } from "scrapegraph-js";
// reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI({ apiKey: "..." })
const sgai = ScrapeGraphAI();
const res = await sgai.scrape({
url: "https://example.com",
formats: [
{ type: "markdown", mode: "reader" },
{ type: "html", mode: "prune" },
{ type: "links" },
{ type: "images" },
{ type: "summary" },
{ type: "screenshot", fullPage: false, width: 1440, height: 900, quality: 90 },
],
});
if (res.status === "success") {
const results = res.data?.results;
console.log("=== Scrape Results ===\n");
console.log("Provider:", res.data?.metadata.provider);
console.log("Content-Type:", res.data?.metadata.contentType);
console.log("Elapsed:", res.elapsedMs, "ms\n");
if (results?.markdown) {
console.log("--- Markdown ---");
console.log("Length:", results.markdown.data?.join("").length, "chars");
console.log("Preview:", results.markdown.data?.[0]?.slice(0, 200), "...\n");
}
if (results?.html) {
console.log("--- HTML ---");
console.log("Length:", results.html.data?.join("").length, "chars\n");
}
if (results?.links) {
console.log("--- Links ---");
console.log("Count:", results.links.metadata?.count);
console.log("Sample:", results.links.data?.slice(0, 5), "\n");
}
if (results?.images) {
console.log("--- Images ---");
console.log("Count:", results.images.metadata?.count);
console.log("Sample:", results.images.data?.slice(0, 3), "\n");
}
if (results?.summary) {
console.log("--- Summary ---");
console.log(results.summary.data, "\n");
}
if (results?.screenshot) {
console.log("--- Screenshot ---");
console.log("URL:", results.screenshot.data.url);
console.log("Dimensions:", results.screenshot.data.width, "x", results.screenshot.data.height);
console.log("Format:", results.screenshot.metadata?.contentType, "\n");
}
} else {
console.error("Failed:", res.error);
}