Skip to content

Commit 42c99cd

Browse files
feat!: replace *Request objects with inline kwargs on public methods
BREAKING CHANGE: Public client methods now accept keyword arguments directly instead of Pydantic *Request objects. This matches the JS SDK and conventional Python SDK ergonomics (OpenAI, Anthropic, Stripe). Before: sgai.scrape(ScrapeRequest(url="https://example.com", formats=[...])) After: sgai.scrape("https://example.com", formats=[...]) The *Request classes remain exported for users who want to build payloads manually, but are no longer accepted by the client methods. Structured types (FetchConfig, format configs, schema dicts) still passed as-is. - Refactored ScrapeGraphAI + AsyncScrapeGraphAI sync/async methods - Added _compact helper to strip None kwargs so Pydantic defaults apply - Updated all 30 examples and README - Updated unit + integration tests Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent cad09de commit 42c99cd

38 files changed

Lines changed: 751 additions & 396 deletions

README.md

Lines changed: 39 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,12 @@ uv add scrapegraph-py
2222
## Quick Start
2323

2424
```python
25-
from scrapegraph_py import ScrapeGraphAI, ScrapeRequest
25+
from scrapegraph_py import ScrapeGraphAI
2626

2727
# reads SGAI_API_KEY from env, or pass explicitly: ScrapeGraphAI(api_key="...")
2828
sgai = ScrapeGraphAI()
2929

30-
result = sgai.scrape(ScrapeRequest(
31-
url="https://example.com",
32-
))
30+
result = sgai.scrape("https://example.com")
3331

3432
if result.status == "success":
3533
print(result.data["results"]["markdown"]["data"])
@@ -56,14 +54,14 @@ Scrape a webpage in multiple formats (markdown, html, screenshot, json, etc).
5654

5755
```python
5856
from scrapegraph_py import (
59-
ScrapeGraphAI, ScrapeRequest, FetchConfig,
57+
ScrapeGraphAI, FetchConfig,
6058
MarkdownFormatConfig, ScreenshotFormatConfig, JsonFormatConfig
6159
)
6260

6361
sgai = ScrapeGraphAI()
6462

65-
res = sgai.scrape(ScrapeRequest(
66-
url="https://example.com",
63+
res = sgai.scrape(
64+
"https://example.com",
6765
formats=[
6866
MarkdownFormatConfig(mode="reader"),
6967
ScreenshotFormatConfig(full_page=True, width=1440, height=900),
@@ -80,7 +78,7 @@ res = sgai.scrape(ScrapeRequest(
8078
cookies={"session": "abc"},
8179
country="us",
8280
),
83-
))
81+
)
8482
```
8583

8684
**Formats:**
@@ -98,61 +96,58 @@ res = sgai.scrape(ScrapeRequest(
9896
Extract structured data from a URL, HTML, or markdown using AI.
9997

10098
```python
101-
from scrapegraph_py import ScrapeGraphAI, ExtractRequest
99+
from scrapegraph_py import ScrapeGraphAI
102100

103101
sgai = ScrapeGraphAI()
104102

105-
res = sgai.extract(ExtractRequest(
106-
url="https://example.com",
103+
res = sgai.extract(
107104
prompt="Extract product names and prices",
105+
url="https://example.com",
108106
schema={"type": "object", "properties": {...}}, # optional
109107
mode="reader", # optional
110-
fetch_config=FetchConfig(...), # optional
111-
))
112-
# Or pass html/markdown directly instead of url
108+
# Or pass html/markdown directly instead of url
109+
)
113110
```
114111

115112
### search
116113

117114
Search the web and optionally extract structured data.
118115

119116
```python
120-
from scrapegraph_py import ScrapeGraphAI, SearchRequest
117+
from scrapegraph_py import ScrapeGraphAI
121118

122119
sgai = ScrapeGraphAI()
123120

124-
res = sgai.search(SearchRequest(
125-
query="best programming languages 2024",
121+
res = sgai.search(
122+
"best programming languages 2024",
126123
num_results=5, # 1-20, default 3
127124
format="markdown", # "markdown" | "html"
128125
prompt="Extract key points", # optional, for AI extraction
129126
schema={...}, # optional
130127
time_range="past_week", # optional
131128
location_geo_code="us", # optional
132-
fetch_config=FetchConfig(...), # optional
133-
))
129+
)
134130
```
135131

136132
### crawl
137133

138134
Crawl a website and its linked pages.
139135

140136
```python
141-
from scrapegraph_py import ScrapeGraphAI, CrawlRequest, MarkdownFormatConfig
137+
from scrapegraph_py import ScrapeGraphAI, MarkdownFormatConfig
142138

143139
sgai = ScrapeGraphAI()
144140

145141
# Start a crawl
146-
start = sgai.crawl.start(CrawlRequest(
147-
url="https://example.com",
142+
start = sgai.crawl.start(
143+
"https://example.com",
148144
formats=[MarkdownFormatConfig()],
149145
max_pages=50,
150146
max_depth=2,
151147
max_links_per_page=10,
152148
include_patterns=["/blog/*"],
153149
exclude_patterns=["/admin/*"],
154-
fetch_config=FetchConfig(...),
155-
))
150+
)
156151

157152
# Check status
158153
status = sgai.crawl.get(start.data["id"])
@@ -168,24 +163,23 @@ sgai.crawl.delete(crawl_id)
168163
Monitor a webpage for changes on a schedule.
169164

170165
```python
171-
from scrapegraph_py import ScrapeGraphAI, MonitorCreateRequest, MarkdownFormatConfig
166+
from scrapegraph_py import ScrapeGraphAI, MarkdownFormatConfig
172167

173168
sgai = ScrapeGraphAI()
174169

175170
# Create a monitor
176-
mon = sgai.monitor.create(MonitorCreateRequest(
177-
url="https://example.com",
171+
mon = sgai.monitor.create(
172+
"https://example.com",
173+
"0 * * * *", # cron expression
178174
name="Price Monitor",
179-
interval="0 * * * *", # cron expression
180175
formats=[MarkdownFormatConfig()],
181176
webhook_url="https://...", # optional
182-
fetch_config=FetchConfig(...),
183-
))
177+
)
184178

185179
# Manage monitors
186180
sgai.monitor.list()
187181
sgai.monitor.get(cron_id)
188-
sgai.monitor.update(cron_id, MonitorUpdateRequest(interval="0 */6 * * *"))
182+
sgai.monitor.update(cron_id, interval="0 */6 * * *")
189183
sgai.monitor.pause(cron_id)
190184
sgai.monitor.resume(cron_id)
191185
sgai.monitor.delete(cron_id)
@@ -196,15 +190,15 @@ sgai.monitor.delete(cron_id)
196190
Fetch request history.
197191

198192
```python
199-
from scrapegraph_py import ScrapeGraphAI, HistoryFilter
193+
from scrapegraph_py import ScrapeGraphAI
200194

201195
sgai = ScrapeGraphAI()
202196

203-
history = sgai.history.list(HistoryFilter(
197+
history = sgai.history.list(
204198
service="scrape", # optional filter
205199
page=1,
206200
limit=20,
207-
))
201+
)
208202

209203
entry = sgai.history.get("request-id")
210204
```
@@ -229,11 +223,11 @@ All methods have async equivalents via `AsyncScrapeGraphAI`:
229223

230224
```python
231225
import asyncio
232-
from scrapegraph_py import AsyncScrapeGraphAI, ScrapeRequest
226+
from scrapegraph_py import AsyncScrapeGraphAI
233227

234228
async def main():
235229
async with AsyncScrapeGraphAI() as sgai:
236-
result = await sgai.scrape(ScrapeRequest(url="https://example.com"))
230+
result = await sgai.scrape("https://example.com")
237231
if result.status == "success":
238232
print(result.data["results"]["markdown"]["data"])
239233
else:
@@ -246,42 +240,36 @@ asyncio.run(main())
246240

247241
```python
248242
async with AsyncScrapeGraphAI() as sgai:
249-
res = await sgai.extract(ExtractRequest(
250-
url="https://example.com",
243+
res = await sgai.extract(
251244
prompt="Extract product names and prices",
252-
))
245+
url="https://example.com",
246+
)
253247
```
254248

255249
### Async Search
256250

257251
```python
258252
async with AsyncScrapeGraphAI() as sgai:
259-
res = await sgai.search(SearchRequest(
260-
query="best programming languages 2024",
261-
num_results=5,
262-
))
253+
res = await sgai.search("best programming languages 2024", num_results=5)
263254
```
264255

265256
### Async Crawl
266257

267258
```python
268259
async with AsyncScrapeGraphAI() as sgai:
269-
start = await sgai.crawl.start(CrawlRequest(
270-
url="https://example.com",
271-
max_pages=50,
272-
))
260+
start = await sgai.crawl.start("https://example.com", max_pages=50)
273261
status = await sgai.crawl.get(start.data["id"])
274262
```
275263

276264
### Async Monitor
277265

278266
```python
279267
async with AsyncScrapeGraphAI() as sgai:
280-
mon = await sgai.monitor.create(MonitorCreateRequest(
281-
url="https://example.com",
268+
mon = await sgai.monitor.create(
269+
"https://example.com",
270+
"0 * * * *",
282271
name="Price Monitor",
283-
interval="0 * * * *",
284-
))
272+
)
285273
```
286274

287275
## Examples

examples/crawl/crawl_basic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import time
5-
from scrapegraph_py import ScrapeGraphAI, CrawlRequest
6+
7+
from scrapegraph_py import ScrapeGraphAI
68

79
sgai = ScrapeGraphAI()
810

9-
start_res = sgai.crawl.start(CrawlRequest(
10-
url="https://scrapegraphai.com/",
11+
start_res = sgai.crawl.start(
12+
"https://scrapegraphai.com/",
1113
max_pages=5,
1214
max_depth=2,
13-
))
15+
)
1416

1517
if start_res.status != "success" or not start_res.data:
1618
print("Failed to start:", start_res.error)

examples/crawl/crawl_basic_async.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import asyncio
5-
from scrapegraph_py import AsyncScrapeGraphAI, CrawlRequest
6+
7+
from scrapegraph_py import AsyncScrapeGraphAI
8+
69

710
async def main():
811
async with AsyncScrapeGraphAI() as sgai:
9-
start_res = await sgai.crawl.start(CrawlRequest(
10-
url="https://scrapegraphai.com/",
12+
start_res = await sgai.crawl.start(
13+
"https://scrapegraphai.com/",
1114
max_pages=5,
1215
max_depth=2,
13-
))
16+
)
1417

1518
if start_res.status != "success" or not start_res.data:
1619
print("Failed to start:", start_res.error)
@@ -33,4 +36,5 @@ async def main():
3336
for page in get_res.data.pages:
3437
print(f" {page.url} - {page.status}")
3538

39+
3640
asyncio.run(main())

examples/crawl/crawl_with_formats.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,26 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import time
6+
57
from scrapegraph_py import (
6-
ScrapeGraphAI,
7-
CrawlRequest,
8-
MarkdownFormatConfig,
98
LinksFormatConfig,
9+
MarkdownFormatConfig,
10+
ScrapeGraphAI,
1011
)
1112

1213
sgai = ScrapeGraphAI()
1314

14-
start_res = sgai.crawl.start(CrawlRequest(
15-
url="https://scrapegraphai.com/",
15+
start_res = sgai.crawl.start(
16+
"https://scrapegraphai.com/",
1617
max_pages=3,
1718
max_depth=1,
1819
formats=[
1920
MarkdownFormatConfig(),
2021
LinksFormatConfig(),
2122
],
22-
))
23+
)
2324

2425
if start_res.status != "success" or not start_res.data:
2526
print("Failed to start:", start_res.error)

examples/crawl/crawl_with_formats_async.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,27 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import asyncio
6+
57
from scrapegraph_py import (
68
AsyncScrapeGraphAI,
7-
CrawlRequest,
8-
MarkdownFormatConfig,
99
LinksFormatConfig,
10+
MarkdownFormatConfig,
1011
)
1112

13+
1214
async def main():
1315
async with AsyncScrapeGraphAI() as sgai:
14-
start_res = await sgai.crawl.start(CrawlRequest(
15-
url="https://scrapegraphai.com/",
16+
start_res = await sgai.crawl.start(
17+
"https://scrapegraphai.com/",
1618
max_pages=3,
1719
max_depth=1,
1820
formats=[
1921
MarkdownFormatConfig(),
2022
LinksFormatConfig(),
2123
],
22-
))
24+
)
2325

2426
if start_res.status != "success" or not start_res.data:
2527
print("Failed to start:", start_res.error)
@@ -44,4 +46,5 @@ async def main():
4446
print(f" Status: {page.status}")
4547
print(f" Depth: {page.depth}")
4648

49+
4750
asyncio.run(main())

examples/extract/extract_basic.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
from dotenv import load_dotenv
2+
23
load_dotenv()
34

45
import json
5-
from scrapegraph_py import ScrapeGraphAI, ExtractRequest
6+
7+
from scrapegraph_py import ScrapeGraphAI
68

79
sgai = ScrapeGraphAI()
810

9-
res = sgai.extract(ExtractRequest(
11+
res = sgai.extract(
12+
"What is this page about? Extract the main heading and description.",
1013
url="https://example.com",
11-
prompt="What is this page about? Extract the main heading and description.",
12-
))
14+
)
1315

1416
if res.status == "success":
1517
print("Extracted:", json.dumps(res.data.json_data, indent=2))

0 commit comments

Comments
 (0)