Skip to content

Commit c0f5fd5

Browse files
VinciGit00claude
andcommitted
feat: align with scrapegraph-py v2 API surface from PR #82
- Pass output_schema to extract() so Pydantic schemas are forwarded to the v2 API - Use context manager pattern (with Client(...) as client) for proper resource cleanup - Simplify examples to match the v2 SDK style from scrapegraph-py - Remove unused sgai_logger import (v2 client handles its own logging) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent fd23bb0 commit c0f5fd5

File tree

4 files changed

+43
-106
lines changed

4 files changed

+43
-106
lines changed
Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,20 @@
11
"""
2-
Example script demonstrating the scrape functionality (v2 API - replaces markdownify)
2+
Scrape a webpage as clean markdown using scrapegraph-py v2 API.
3+
Replaces the old markdownify() call with scrape().
34
"""
45

56
import json
67
import os
8+
79
from dotenv import load_dotenv
810
from scrapegraph_py import Client
9-
from scrapegraph_py.logger import sgai_logger
10-
11-
def main():
12-
# Load environment variables
13-
load_dotenv()
1411

15-
# Set up logging
16-
sgai_logger.set_logging(level="INFO")
12+
load_dotenv()
1713

18-
# Initialize the client
19-
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
20-
if not api_key:
21-
raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found")
22-
sgai_client = Client(api_key=api_key)
14+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
15+
if not api_key:
16+
raise ValueError("SCRAPEGRAPH_API_KEY environment variable not found")
2317

24-
# Scrape a website as markdown (v2 API - replaces markdownify)
25-
print("Scraping website as Markdown")
26-
print("-" * 50)
27-
response = sgai_client.scrape(
28-
url="https://example.com"
29-
)
18+
with Client(api_key=api_key) as client:
19+
response = client.scrape(url="https://example.com")
3020
print(json.dumps(response, indent=2))
31-
32-
if __name__ == "__main__":
33-
main()
Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,20 @@
11
"""
2-
Example implementation of search-based scraping using Scrapegraph AI v2 API.
3-
This example demonstrates how to use the search endpoint to extract information from the web.
2+
Search the web and extract AI-structured results using scrapegraph-py v2 API.
3+
Replaces the old searchscraper() call with search().
44
"""
55

66
import json
77
import os
8+
89
from dotenv import load_dotenv
910
from scrapegraph_py import Client
10-
from scrapegraph_py.logger import sgai_logger
11-
12-
def main():
13-
# Load environment variables
14-
load_dotenv()
15-
16-
# Get API key
17-
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
18-
if not api_key:
19-
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")
20-
21-
# Configure logging
22-
sgai_logger.set_logging(level="INFO")
23-
24-
# Initialize client
25-
sgai_client = Client(api_key=api_key)
26-
27-
try:
28-
# Search request (v2 API - replaces searchscraper)
29-
print("\nSearching for information...")
3011

31-
search_response = sgai_client.search(
32-
query="Extract webpage information"
33-
)
34-
print(json.dumps(search_response, indent=2))
12+
load_dotenv()
3513

36-
except Exception as e:
37-
print(f"\nError occurred: {str(e)}")
38-
finally:
39-
# Always close the client
40-
sgai_client.close()
14+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
15+
if not api_key:
16+
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")
4117

42-
if __name__ == "__main__":
43-
main()
18+
with Client(api_key=api_key) as client:
19+
response = client.search(query="Extract webpage information")
20+
print(json.dumps(response, indent=2))
Lines changed: 13 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,23 @@
11
"""
2-
Example implementation using scrapegraph-py v2 client directly.
2+
Extract structured data from a webpage using scrapegraph-py v2 API.
3+
Replaces the old smartscraper() call with extract().
34
"""
45

56
import json
67
import os
8+
79
from dotenv import load_dotenv
810
from scrapegraph_py import Client
9-
from scrapegraph_py.logger import sgai_logger
10-
11-
def main():
12-
# Load environment variables from .env file
13-
load_dotenv()
14-
15-
# Get API key from environment variables
16-
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
17-
if not api_key:
18-
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")
19-
20-
# Set up logging
21-
sgai_logger.set_logging(level="INFO")
22-
23-
# Initialize the client with API key from environment
24-
sgai_client = Client(api_key=api_key)
25-
26-
try:
27-
# Extract request (v2 API - replaces smartscraper)
28-
response = sgai_client.extract(
29-
url="https://scrapegraphai.com",
30-
prompt="Extract the founders' informations"
31-
)
3211

33-
# Print the response
34-
print(json.dumps(response, indent=2))
12+
load_dotenv()
3513

36-
except Exception as e:
37-
print(f"Error occurred: {str(e)}")
38-
finally:
39-
# Always close the client
40-
sgai_client.close()
14+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
15+
if not api_key:
16+
raise ValueError("SCRAPEGRAPH_API_KEY not found in environment variables")
4117

42-
if __name__ == "__main__":
43-
main()
18+
with Client(api_key=api_key) as client:
19+
response = client.extract(
20+
url="https://scrapegraphai.com",
21+
prompt="Extract the founders' informations",
22+
)
23+
print(json.dumps(response, indent=2))

scrapegraphai/graphs/smart_scraper_graph.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -79,30 +79,23 @@ def _create_graph(self) -> BaseGraph:
7979
if self.llm_model == "scrapegraphai/smart-scraper":
8080
try:
8181
from scrapegraph_py import Client
82-
from scrapegraph_py.logger import sgai_logger
8382
except ImportError:
8483
raise ImportError(
8584
"scrapegraph_py is not installed. Please install it using 'pip install scrapegraph-py'."
8685
)
8786

88-
sgai_logger.set_logging(level="INFO")
89-
90-
# Initialize the client with explicit API key
91-
sgai_client = Client(api_key=self.config.get("api_key"))
92-
93-
# Extract request (v2 API)
94-
response = sgai_client.extract(
95-
url=self.source,
96-
prompt=self.prompt,
97-
)
98-
99-
# Use logging instead of print for better production practices
100-
if "id" in response:
101-
logger.info(f"Request ID: {response['id']}")
102-
if "data" in response:
103-
logger.info(f"Result: {response['data']}")
87+
with Client(api_key=self.config.get("api_key")) as sgai_client:
88+
# Extract request (v2 API)
89+
response = sgai_client.extract(
90+
url=self.source,
91+
prompt=self.prompt,
92+
output_schema=self.schema,
93+
)
10494

105-
sgai_client.close()
95+
if "id" in response:
96+
logger.info(f"Request ID: {response['id']}")
97+
if "data" in response:
98+
logger.info(f"Result: {response['data']}")
10699

107100
return response
108101

0 commit comments

Comments
 (0)