Skip to content

Commit bed2ac4

Browse files
authored
Merge pull request #114 from brightdata/feat/add-prompts
feat:add prompts
2 parents 8e2cf35 + 4b632b0 commit bed2ac4

File tree

2 files changed

+91
-0
lines changed

2 files changed

+91
-0
lines changed

prompts.js

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
// LICENSE_CODE ZON
2+
'use strict'; /*jslint node:true es9:true*/
3+
4+
const web_scraping_strategy = {
5+
name: 'web_scraping_strategy',
6+
description: 'Decision tree for picking the right Bright Data tool. '
7+
+'Invoke at the start of any scraping session to learn the correct '
8+
+'tool selection order '
9+
+'(dataset tools -> Web Unlocker -> Browser API).',
10+
arguments: [],
11+
load: ()=>'You have access to Bright Data tools at three tiers of cost'
12+
+' and capability.\n'
13+
+'Always follow this order -- do not skip ahead:\n'
14+
+'\nSTEP 1 -- Check for a dedicated dataset tool (fastest, cheapest):'
15+
+'\n Look at the URL. If it matches a known platform, use the'
16+
+' corresponding web_data_* tool:'
17+
+'\n - Amazon product page (/dp/) -> web_data_amazon_product'
18+
+'\n - Amazon search results'
19+
+' -> web_data_amazon_product_search'
20+
+'\n - LinkedIn profile'
21+
+' -> web_data_linkedin_person_profile'
22+
+'\n - LinkedIn company'
23+
+' -> web_data_linkedin_company_profile'
24+
+'\n - Instagram profile/post/reel'
25+
+' -> web_data_instagram_profiles / _posts / _reels'
26+
+'\n - TikTok profile/post'
27+
+' -> web_data_tiktok_profiles / _posts'
28+
+'\n - YouTube video/channel'
29+
+' -> web_data_youtube_videos / _profiles'
30+
+'\n - Reddit post -> web_data_reddit_posts'
31+
+'\n - X (Twitter) post -> web_data_x_posts'
32+
+'\n - Zillow listing'
33+
+' -> web_data_zillow_properties_listing'
34+
+'\n - Booking.com hotel'
35+
+' -> web_data_booking_hotel_listings'
36+
+'\n - GitHub file'
37+
+' -> web_data_github_repository_file'
38+
+'\n - Google Maps reviews -> web_data_google_maps_reviews'
39+
+'\n - Google Shopping -> web_data_google_shopping'
40+
+'\n - (and more -- check all web_data_* tools before proceeding)'
41+
+'\n\nSTEP 2 -- If no dataset tool matches, use scrape_as_markdown'
42+
+' (default):'
43+
+'\n This handles anti-bot protection and CAPTCHA automatically.'
44+
+'\n Retry once if the first attempt returns empty or blocked'
45+
+' content.'
46+
+'\n\nSTEP 3 -- If scrape_as_markdown fails twice, escalate to'
47+
+' scraping_browser_navigate:'
48+
+'\n Use ONLY when the page requires JavaScript execution,'
49+
+' user interaction'
50+
+'\n (clicking, form submission), or dynamic content loading.'
51+
+'\n This is slower and more expensive'
52+
+' -- do not use as a first attempt.'
53+
+'\n\nNEVER use the browser tools for sites'
54+
+' scrape_as_markdown can handle.'
55+
+'\nNEVER use scrape_as_markdown when a web_data_* tool matches'
56+
+' the URL pattern.',
57+
};
58+
59+
const diagnose_scraping_approach = {
60+
name: 'diagnose_scraping_approach',
61+
description: 'Run a two-step diagnostic to discover the correct '
62+
+'Bright Data product for a new website. Tries Web Unlocker first, '
63+
+'then Browser API, then reports which succeeded.',
64+
arguments: [],
65+
load: ()=>'To discover the correct Bright Data product for a new'
66+
+' website, run this diagnostic:\n'
67+
+'\n1. Try scrape_as_markdown on the target URL.'
68+
+'\n - If it returns useful content'
69+
+' -> Web Unlocker is the correct integration. Stop.'
70+
+'\n - If it returns empty, blocked, or low-quality content'
71+
+' -> continue to step 2.'
72+
+'\n\n2. Try scraping_browser_navigate + scraping_browser_snapshot'
73+
+' on the same URL.'
74+
+'\n - If it returns useful content'
75+
+' -> Browser API is the correct integration. Stop.'
76+
+'\n - If both fail -> report to the user that the target may'
77+
+' require a specialized'
78+
+'\n Bright Data product'
79+
+' (SERP API, specific dataset tool, or custom configuration).'
80+
+'\n\nReport which approach succeeded and recommend it as the'
81+
+' integration method.'
82+
+'\nDo not proceed with data extraction until the diagnostic'
83+
+' is complete.',
84+
};
85+
86+
const prompts = [web_scraping_strategy, diagnose_scraping_approach];
87+
88+
export default prompts;

server.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {FastMCP} from 'fastmcp';
44
import {z} from 'zod';
55
import axios from 'axios';
66
import {tools as browser_tools} from './browser_tools.js';
7+
import prompts from './prompts.js';
78
import {GROUPS} from './tool_groups.js';
89
import {createRequire} from 'node:module';
910
import {remark} from 'remark';
@@ -988,6 +989,8 @@ for (let {dataset_id, id, description, inputs, defaults = {},
988989
});
989990
}
990991

992+
server.addPrompts(prompts);
993+
991994
for (let tool of browser_tools)
992995
addTool(tool);
993996

0 commit comments

Comments
 (0)