diff --git a/README.md b/README.md index 8d37495..9768bc5 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,7 @@ https://mcp.brightdata.com/mcp?token=YOUR_API_TOKEN_HERE

✅ Web Search
✅ Scraping with Web unlocker
+ ✅ AI-ranked Discover search
❌ Browser Automation
❌ Web data tools


@@ -212,7 +213,7 @@ https://mcp.brightdata.com/mcp?token=YOUR_API_TOKEN_HERE - Mode priority: `PRO_MODE=true` (all tools) → `GROUPS` / `TOOLS` (whitelist) → default rapid mode (base toolkit). - Base tools always enabled: `search_engine`, `search_engine_batch`, - `scrape_as_markdown`, `scrape_batch`. + `scrape_as_markdown`, `scrape_batch`, `discover`. - Group ID `custom` is reserved; use `TOOLS` for bespoke picks. @@ -394,6 +395,7 @@ https://github.com/user-attachments/assets/61ab0bee-fdfa-4d50-b0de-5fab96b4b91d |------|-------------|----------| | 🔍 `search_engine` | Web search with AI-optimized results | Research, fact-checking, current events | | 📄 `scrape_as_markdown` | Convert any webpage to clean markdown | Content extraction, documentation | +| 🎯 `discover` | AI-ranked web search with intent-based relevance scoring | Deep research, RAG pipelines, competitive analysis | ### 💎 Pro Mode Tools (60+ Tools) diff --git a/assets/Tools.md b/assets/Tools.md index b3839d7..391c38c 100644 --- a/assets/Tools.md +++ b/assets/Tools.md @@ -6,6 +6,7 @@ |scrape_batch|Scrape up to 10 webpages in one request and return an array of URL/content pairs in Markdown format.| |scrape_as_html|Scrape a single webpage with advanced extraction and return the HTML response body. Handles sites protected by bot detection or CAPTCHA.| |extract|Scrape a webpage as Markdown and convert it to structured JSON using AI sampling, with an optional custom extraction prompt.| +|discover|Search the web and rank results by AI-driven relevance. Returns scored results with title, description, URL, and relevance score. Supports intent-based ranking, geo-targeting, date filtering, and keyword filtering.| |session_stats|Report how many times each tool has been called during the current MCP session.| |web_data_amazon_product|Quickly read structured Amazon product data. Requires a valid product URL containing /dp/. Often faster and more reliable than scraping.| |web_data_amazon_product_reviews|Quickly read structured Amazon product review data. Requires a valid product URL containing /dp/. Often faster and more reliable than scraping.| diff --git a/manifest.json b/manifest.json index 6d09d81..7f00947 100644 --- a/manifest.json +++ b/manifest.json @@ -40,6 +40,7 @@ {"name": "scrape_batch", "description": "Scrape multiple webpage URLs with advanced options for content extraction and get back the results in Markdown. This tool can unlock any webpage even if it uses bot detection or CAPTCHA. Processes up to 10 URLs."}, {"name": "scrape_as_html", "description": "Scrape a single webpage URL with advanced options for content extraction and get back the results in HTML. This tool can unlock any webpage even if it uses bot detection or CAPTCHA."}, {"name": "extract", "description": "Scrape a webpage and extract structured data as JSON. First scrapes the page as markdown, then uses AI sampling to convert it to structured JSON format. This tool can unlock any webpage even if it uses bot detection or CAPTCHA."}, + {"name": "discover", "description": "Search the web and rank results by AI-driven relevance. Returns scored results with title, description, and URL. Supports intent-based ranking, geo-targeting, date filtering, and keyword filtering."}, {"name": "session_stats", "description": "Tell the user about the tool usage during this session"}, {"name": "web_data_amazon_product", "description": "Quickly read structured amazon product data. Requires a valid product URL with /dp/ in it. This can be a cache lookup, so it can be more reliable than scraping."}, {"name": "web_data_amazon_product_reviews", "description": "Quickly read structured amazon product review data. Requires a valid product URL with /dp/ in it. This can be a cache lookup, so it can be more reliable than scraping."}, diff --git a/package-lock.json b/package-lock.json index e63d52a..a921cb3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@brightdata/mcp", - "version": "2.9.3", + "version": "2.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@brightdata/mcp", - "version": "2.9.3", + "version": "2.9.4", "license": "MIT", "dependencies": { "@modelcontextprotocol/sdk": "1.21.2", diff --git a/package.json b/package.json index eac2cac..4dd4f6d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@brightdata/mcp", - "version": "2.9.3", + "version": "2.9.4", "description": "An MCP interface into the Bright Data toolset", "type": "module", "main": "./server.js", diff --git a/server.js b/server.js index 4fcaf14..dd00e57 100644 --- a/server.js +++ b/server.js @@ -21,7 +21,7 @@ const base_timeout = process.env.BASE_TIMEOUT const base_max_retries = Math.min( parseInt(process.env.BASE_MAX_RETRIES || '0', 10), 3); const pro_mode_tools = ['search_engine', 'scrape_as_markdown', - 'search_engine_batch', 'scrape_batch']; + 'search_engine_batch', 'scrape_batch', 'discover']; const tool_groups = process.env.GROUPS ? process.env.GROUPS.split(',').map(g=>g.trim().toLowerCase()) .filter(Boolean) : []; @@ -487,6 +487,125 @@ addTool({ }), }); +addTool({ + name: 'discover', + description: 'Search the web and rank results by AI-driven relevance. ' + +'Returns scored results with title, description, and URL. Supports ' + +'intent-based ranking, geo-targeting, date filtering, and keyword ' + +'filtering.', + annotations: { + title: 'Discover', + readOnlyHint: true, + openWorldHint: true, + }, + parameters: z.object({ + query: z.string().describe('The search query'), + intent: z.string().optional().describe('Describes the specific goal ' + +'of the search to help the AI evaluate and rank result relevance.' + +'If not provided, the query string is used as the intent'), + country: z.string().length(2).optional() + .describe('2-letter ISO country code for localized results ' + +'(e.g., "US", "GB", "DE")'), + city: z.string().optional() + .describe('City for localized results (e.g., "New York", ' + +'"Berlin")'), + language: z.string().optional() + .describe('Language code (e.g., "en", "es", "fr")'), + num_results: z.number().int().optional() + .describe('Exact number of search results to return'), + filter_keywords: z.array(z.string()).optional() + .describe('Keywords that must appear in search results'), + remove_duplicates: z.boolean().optional() + .describe('Remove duplicate results (default: true)'), + start_date: z.string().optional() + .describe('Only content updated from this date (YYYY-MM-DD)'), + end_date: z.string().optional() + .describe('Only content updated until this date (YYYY-MM-DD)'), + }), + execute: tool_fn('discover', async(data, ctx)=>{ + let body = {query: data.query, format: 'json'}; + if (data.intent) + body.intent = data.intent; + if (data.country) + body.country = data.country; + if (data.city) + body.city = data.city; + if (data.language) + body.language = data.language; + if (data.num_results) + body.num_results = data.num_results; + if (data.filter_keywords) + body.filter_keywords = data.filter_keywords; + if (data.remove_duplicates===false) + body.remove_duplicates = false; + if (data.start_date) + body.start_date = data.start_date; + if (data.end_date) + body.end_date = data.end_date; + let trigger_response = await axios({ + url: 'https://api.brightdata.com/discover', + method: 'POST', + data: body, + headers: { + ...api_headers(ctx.clientName, 'discover'), + 'Content-Type': 'application/json', + }, + }); + let task_id = trigger_response.data?.task_id; + if (!task_id) + throw new Error('No task_id returned from discover request'); + console.error(`[discover] triggered with task ID: ${task_id}`); + let max_attempts = polling_timeout; + let attempts = 0; + while (attemptssetTimeout(resolve, 1000)); + continue; + } + console.error(`[discover] results received after ` + +`${attempts+1} attempts`); + let results = poll_response.data?.results || []; + results = results.map(r=>({ + link: r.link, + title: r.title, + description: r.description, + relevance_score: r.relevance_score, + })); + return JSON.stringify(results); + } catch(e){ + console.error(`[discover] polling error: ${e.message}`); + if (e.response?.status===400) + throw e; + attempts++; + await new Promise(resolve=>setTimeout(resolve, 1000)); + } + } + throw new Error(`Timeout after ${max_attempts} seconds waiting ` + +`for discover results`); + }), +}); + addTool({ name: 'session_stats', description: 'Tell the user about the tool usage during this session', diff --git a/tool_groups.js b/tool_groups.js index 9e1dd56..a9027fd 100644 --- a/tool_groups.js +++ b/tool_groups.js @@ -1,6 +1,6 @@ 'use strict'; /*jslint node:true es9:true*/ -const base_tools = ['search_engine', 'scrape_as_markdown']; +const base_tools = ['search_engine', 'scrape_as_markdown', 'discover']; export const GROUPS = { ECOMMERCE: {