Skip to content

Commit f996715

Browse files
authored
Merge pull request #88 from ScrapeGraphAI/feat/ergonomic-api
feat!: kwargs API — replace *Request objects on public methods
2 parents cad09de + d85a759 commit f996715

41 files changed

Lines changed: 803 additions & 432 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ htmlcov/
4545

4646
# Misc
4747
.bfg-report/
48+
playground.py

README.md

Lines changed: 39 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,12 @@ uv add scrapegraph-py
2222
## Quick Start
2323

2424
```python
25-
from scrapegraph_py import ScrapeGraphAI, ScrapeRequest
25+
from scrapegraph_py import ScrapeGraphAI
2626

2727
# reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI(api_key="...")
2828
sgai = ScrapeGraphAI()
2929

30-
result = sgai.scrape(ScrapeRequest(
31-
url="https://example.com",
32-
))
30+
result = sgai.scrape("https://example.com")
3331

3432
if result.status == "success":
3533
print(result.data["results"]["markdown"]["data"])
@@ -56,14 +54,14 @@ Scrape a webpage in multiple formats (markdown, html, screenshot, json, etc).
5654

5755
```python
5856
from scrapegraph_py import (
59-
ScrapeGraphAI, ScrapeRequest, FetchConfig,
57+
ScrapeGraphAI, FetchConfig,
6058
MarkdownFormatConfig, ScreenshotFormatConfig, JsonFormatConfig
6159
)
6260

6361
sgai = ScrapeGraphAI()
6462

65-
res = sgai.scrape(ScrapeRequest(
66-
url="https://example.com",
63+
res = sgai.scrape(
64+
"https://example.com",
6765
formats=[
6866
MarkdownFormatConfig(mode="reader"),
6967
ScreenshotFormatConfig(full_page=True, width=1440, height=900),
@@ -80,7 +78,7 @@ res = sgai.scrape(ScrapeRequest(
8078
cookies={"session": "abc"},
8179
country="us",
8280
),
83-
))
81+
)
8482
```
8583

8684
**Formats:**
@@ -98,61 +96,58 @@ res = sgai.scrape(ScrapeRequest(
9896
Extract structured data from a URL, HTML, or markdown using AI.
9997

10098
```python
101-
from scrapegraph_py import ScrapeGraphAI, ExtractRequest
99+
from scrapegraph_py import ScrapeGraphAI
102100

103101
sgai = ScrapeGraphAI()
104102

105-
res = sgai.extract(ExtractRequest(
106-
url="https://example.com",
103+
res = sgai.extract(
107104
prompt="Extract product names and prices",
105+
url="https://example.com",
108106
schema={"type": "object", "properties": {...}}, # optional
109107
mode="reader", # optional
110-
fetch_config=FetchConfig(...), # optional
111-
))
112-
# Or pass html/markdown directly instead of url
108+
# Or pass html/markdown directly instead of url
109+
)
113110
```
114111

115112
### search
116113

117114
Search the web and optionally extract structured data.
118115

119116
```python
120-
from scrapegraph_py import ScrapeGraphAI, SearchRequest
117+
from scrapegraph_py import ScrapeGraphAI
121118

122119
sgai = ScrapeGraphAI()
123120

124-
res = sgai.search(SearchRequest(
125-
query="best programming languages 2024",
121+
res = sgai.search(
122+
"best programming languages 2024",
126123
num_results=5, # 1-20, default 3
127124
format="markdown", # "markdown" | "html"
128125
prompt="Extract key points", # optional, for AI extraction
129126
schema={...}, # optional
130127
time_range="past_week", # optional
131128
location_geo_code="us", # optional
132-
fetch_config=FetchConfig(...), # optional
133-
))
129+
)
134130
```
135131

136132
### crawl
137133

138134
Crawl a website and its linked pages.
139135

140136
```python
141-
from scrapegraph_py import ScrapeGraphAI, CrawlRequest, MarkdownFormatConfig
137+
from scrapegraph_py import ScrapeGraphAI, MarkdownFormatConfig
142138

143139
sgai = ScrapeGraphAI()
144140

145141
# Start a crawl
146-
start = sgai.crawl.start(CrawlRequest(
147-
url="https://example.com",
142+
start = sgai.crawl.start(
143+
"https://example.com",
148144
formats=[MarkdownFormatConfig()],
149145
max_pages=50,
150146
max_depth=2,
151147
max_links_per_page=10,
152148
include_patterns=["/blog/*"],
153149
exclude_patterns=["/admin/*"],
154-
fetch_config=FetchConfig(...),
155-
))
150+
)
156151

157152
# Check status
158153
status = sgai.crawl.get(start.data["id"])
@@ -168,24 +163,23 @@ sgai.crawl.delete(crawl_id)
168163
Monitor a webpage for changes on a schedule.
169164

170165
```python
171-
from scrapegraph_py import ScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
166+
from scrapegraph_py import ScrapeGraphAI, MarkdownFormatConfig
172167

173168
sgai = ScrapeGraphAI()
174169

175170
# Create a monitor
176-
mon = sgai.monitor.create(MonitorCreateRequest(
177-
url="https://example.com",
171+
mon = sgai.monitor.create(
172+
"https://example.com",
173+
"0 * * * *", # cron expression
178174
name="Price Monitor",
179-
interval="0 * * * *", # cron expression
180175
formats=[MarkdownFormatConfig()],
181176
webhook_url="https://...", # optional
182-
fetch_config=FetchConfig(...),
183-
))
177+
)
184178

185179
# Manage monitors
186180
sgai.monitor.list()
187181
sgai.monitor.get(cron_id)
188-
sgai.monitor.update(cron_id, MonitorUpdateRequest(interval="0 */6 * * *"))
182+
sgai.monitor.update(cron_id, interval="0 */6 * * *")
189183
sgai.monitor.pause(cron_id)
190184
sgai.monitor.resume(cron_id)
191185
sgai.monitor.delete(cron_id)
@@ -196,15 +190,15 @@ sgai.monitor.delete(cron_id)
196190
Fetch request history.
197191

198192
```python
199-
from scrapegraph_py import ScrapeGraphAI, HistoryFilter
193+
from scrapegraph_py import ScrapeGraphAI
200194

201195
sgai = ScrapeGraphAI()
202196

203-
history = sgai.history.list(HistoryFilter(
197+
history = sgai.history.list(
204198
service="scrape", # optional filter
205199
page=1,
206200
limit=20,
207-
))
201+
)
208202

209203
entry = sgai.history.get("request-id")
210204
```
@@ -229,11 +223,11 @@ All methods have async equivalents via `AsyncScrapeGraphAI`:
229223

230224
```python
231225
import asyncio
232-
from scrapegraph_py import AsyncScrapeGraphAI, ScrapeRequest
226+
from scrapegraph_py import AsyncScrapeGraphAI
233227

234228
async def main():
235229
async with AsyncScrapeGraphAI() as sgai:
236-
result = await sgai.scrape(ScrapeRequest(url="https://example.com"))
230+
result = await sgai.scrape("https://example.com")
237231
if result.status == "success":
238232
print(result.data["results"]["markdown"]["data"])
239233
else:
@@ -246,42 +240,36 @@ asyncio.run(main())
246240

247241
```python
248242
async with AsyncScrapeGraphAI() as sgai:
249-
res = await sgai.extract(ExtractRequest(
250-
url="https://example.com",
243+
res = await sgai.extract(
251244
prompt="Extract product names and prices",
252-
))
245+
url="https://example.com",
246+
)
253247
```
254248

255249
### Async Search
256250

257251
```python
258252
async with AsyncScrapeGraphAI() as sgai:
259-
res = await sgai.search(SearchRequest(
260-
query="best programming languages 2024",
261-
num_results=5,
262-
))
253+
res = await sgai.search("best programming languages 2024", num_results=5)
263254
```
264255

265256
### Async Crawl
266257

267258
```python
268259
async with AsyncScrapeGraphAI() as sgai:
269-
start = await sgai.crawl.start(CrawlRequest(
270-
url="https://example.com",
271-
max_pages=50,
272-
))
260+
start = await sgai.crawl.start("https://example.com", max_pages=50)
273261
status = await sgai.crawl.get(start.data["id"])
274262
```
275263

276264
### Async Monitor
277265

278266
```python
279267
async with AsyncScrapeGraphAI() as sgai:
280-
mon = await sgai.monitor.create(MonitorCreateRequest(
281-
url="https://example.com",
268+
mon = await sgai.monitor.create(
269+
"https://example.com",
270+
"0 * * * *",
282271
name="Price Monitor",
283-
interval="0 * * * *",
284-
))
272+
)
285273
```
286274

287275
## Examples

examples/crawl/crawl_basic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import time
5-
from scrapegraph_py import ScrapeGraphAI, CrawlRequest
6+
7+
from scrapegraph_py import ScrapeGraphAI
68

79
sgai = ScrapeGraphAI()
810

9-
start_res = sgai.crawl.start(CrawlRequest(
10-
url="https://scrapegraphai.com/",
11+
start_res = sgai.crawl.start(
12+
"https://scrapegraphai.com/",
1113
max_pages=5,
1214
max_depth=2,
13-
))
15+
)
1416

1517
if start_res.status != "success" or not start_res.data:
1618
print("Failed to start:", start_res.error)

examples/crawl/crawl_basic_async.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import asyncio
5-
from scrapegraph_py import AsyncScrapeGraphAI, CrawlRequest
6+
7+
from scrapegraph_py import AsyncScrapeGraphAI
8+
69

710
async def main():
811
async with AsyncScrapeGraphAI() as sgai:
9-
start_res = await sgai.crawl.start(CrawlRequest(
10-
url="https://scrapegraphai.com/",
12+
start_res = await sgai.crawl.start(
13+
"https://scrapegraphai.com/",
1114
max_pages=5,
1215
max_depth=2,
13-
))
16+
)
1417

1518
if start_res.status != "success" or not start_res.data:
1619
print("Failed to start:", start_res.error)
@@ -33,4 +36,5 @@ async def main():
3336
for page in get_res.data.pages:
3437
print(f" {page.url} - {page.status}")
3538

39+
3640
asyncio.run(main())

examples/crawl/crawl_with_formats.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import time
6+
57
from scrapegraph_py import (
6-
ScrapeGraphAI,
7-
CrawlRequest,
8-
MarkdownFormatConfig,
98
LinksFormatConfig,
9+
MarkdownFormatConfig,
10+
ScrapeGraphAI,
1011
)
1112

1213
sgai = ScrapeGraphAI()
1314

14-
start_res = sgai.crawl.start(CrawlRequest(
15-
url="https://scrapegraphai.com/",
15+
start_res = sgai.crawl.start(
16+
"https://scrapegraphai.com/",
1617
max_pages=3,
1718
max_depth=1,
1819
formats=[
1920
MarkdownFormatConfig(),
2021
LinksFormatConfig(),
2122
],
22-
))
23+
)
2324

2425
if start_res.status != "success" or not start_res.data:
2526
print("Failed to start:", start_res.error)

examples/crawl/crawl_with_formats_async.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,27 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import asyncio
6+
57
from scrapegraph_py import (
68
AsyncScrapeGraphAI,
7-
CrawlRequest,
8-
MarkdownFormatConfig,
99
LinksFormatConfig,
10+
MarkdownFormatConfig,
1011
)
1112

13+
1214
async def main():
1315
async with AsyncScrapeGraphAI() as sgai:
14-
start_res = await sgai.crawl.start(CrawlRequest(
15-
url="https://scrapegraphai.com/",
16+
start_res = await sgai.crawl.start(
17+
"https://scrapegraphai.com/",
1618
max_pages=3,
1719
max_depth=1,
1820
formats=[
1921
MarkdownFormatConfig(),
2022
LinksFormatConfig(),
2123
],
22-
))
24+
)
2325

2426
if start_res.status != "success" or not start_res.data:
2527
print("Failed to start:", start_res.error)
@@ -44,4 +46,5 @@ async def main():
4446
print(f" Status: {page.status}")
4547
print(f" Depth: {page.depth}")
4648

49+
4750
asyncio.run(main())

examples/extract/extract_basic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import json
5-
from scrapegraph_py import ScrapeGraphAI, ExtractRequest
6+
7+
from scrapegraph_py import ScrapeGraphAI
68

79
sgai = ScrapeGraphAI()
810

9-
res = sgai.extract(ExtractRequest(
11+
res = sgai.extract(
12+
"What is this page about? Extract the main heading and description.",
1013
url="https://example.com",
11-
prompt="What is this page about? Extract the main heading and description.",
12-
))
14+
)
1315

1416
if res.status == "success":
1517
print("Extracted:", json.dumps(res.data.json_data, indent=2))

0 commit comments

Comments
 (0)