Skip to content

Commit d08b944

Browse files
fix(rate-limit): ignore hidden recaptcha (#244)
2 parents 0ff206f + 8619232 commit d08b944

2 files changed

Lines changed: 2 additions & 56 deletions

File tree

linkedin_mcp_server/core/utils.py

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@ async def detect_rate_limit(page: Page) -> None:
1515
1616
Checks (in order):
1717
1. URL contains /checkpoint or /authwall (security challenge)
18-
2. Page contains CAPTCHA iframe (bot detection)
19-
3. Body text contains rate-limit phrases on error-shaped pages (throttling)
18+
2. Body text contains rate-limit phrases on error-shaped pages (throttling)
2019
2120
The body-text heuristic only runs on pages without a ``<main>`` element
2221
and with short body text (<2000 chars), since real rate-limit pages are
@@ -35,23 +34,6 @@ async def detect_rate_limit(page: Page) -> None:
3534
suggested_wait_time=30,
3635
)
3736

38-
# Check for CAPTCHA
39-
try:
40-
captcha = await page.locator(
41-
'iframe[title*="captcha" i], iframe[src*="captcha" i]'
42-
).count()
43-
if captcha > 0:
44-
raise RateLimitError(
45-
"CAPTCHA challenge detected. Manual intervention required.",
46-
suggested_wait_time=30,
47-
)
48-
except RateLimitError:
49-
raise
50-
except PlaywrightTimeoutError:
51-
pass
52-
except Exception as e:
53-
logger.debug("Error checking for CAPTCHA: %s", e)
54-
5537
# Check for rate limit messages — only on error-shaped pages.
5638
# Real rate-limit pages have no <main> element and short body text.
5739
# Normal LinkedIn pages (profiles, jobs) have <main> and long content

tests/test_core_utils.py

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -32,39 +32,18 @@ async def test_authwall_url_raises(self, mock_page):
3232
with pytest.raises(RateLimitError, match="security checkpoint"):
3333
await detect_rate_limit(mock_page)
3434

35-
async def test_captcha_iframe_raises(self, mock_page):
36-
captcha_locator = MagicMock()
37-
captcha_locator.count = AsyncMock(return_value=1)
38-
39-
main_locator = MagicMock()
40-
main_locator.count = AsyncMock(return_value=0)
41-
42-
def locator_side_effect(selector):
43-
if "captcha" in selector:
44-
return captcha_locator
45-
return main_locator
46-
47-
mock_page.locator = MagicMock(side_effect=locator_side_effect)
48-
with pytest.raises(RateLimitError, match="CAPTCHA"):
49-
await detect_rate_limit(mock_page)
50-
5135
async def test_normal_page_with_main_skips_body_heuristic(self, mock_page):
5236
"""A normal page with <main> should NOT trigger body text checks."""
5337
main_locator = MagicMock()
5438
main_locator.count = AsyncMock(return_value=1)
5539

56-
captcha_locator = MagicMock()
57-
captcha_locator.count = AsyncMock(return_value=0)
58-
5940
body_locator = MagicMock()
6041
# Body contains a phrase that would false-positive
6142
body_locator.inner_text = AsyncMock(
6243
return_value="Helping SaaS teams slow down churn with data-driven retention"
6344
)
6445

6546
def locator_side_effect(selector):
66-
if "captcha" in selector:
67-
return captcha_locator
6847
if selector == "main":
6948
return main_locator
7049
if selector == "body":
@@ -80,17 +59,12 @@ async def test_error_page_without_main_triggers_heuristic(self, mock_page):
8059
main_locator = MagicMock()
8160
main_locator.count = AsyncMock(return_value=0)
8261

83-
captcha_locator = MagicMock()
84-
captcha_locator.count = AsyncMock(return_value=0)
85-
8662
body_locator = MagicMock()
8763
body_locator.inner_text = AsyncMock(
8864
return_value="Too many requests. Slow down."
8965
)
9066

9167
def locator_side_effect(selector):
92-
if "captcha" in selector:
93-
return captcha_locator
9468
if selector == "main":
9569
return main_locator
9670
if selector == "body":
@@ -106,18 +80,13 @@ async def test_long_body_without_main_does_not_trigger(self, mock_page):
10680
main_locator = MagicMock()
10781
main_locator.count = AsyncMock(return_value=0)
10882

109-
captcha_locator = MagicMock()
110-
captcha_locator.count = AsyncMock(return_value=0)
111-
11283
body_locator = MagicMock()
11384
# Long body with a matching phrase buried in content
11485
body_locator.inner_text = AsyncMock(
11586
return_value="x" * 2000 + " try again later"
11687
)
11788

11889
def locator_side_effect(selector):
119-
if "captcha" in selector:
120-
return captcha_locator
12190
if selector == "main":
12291
return main_locator
12392
if selector == "body":
@@ -128,17 +97,12 @@ def locator_side_effect(selector):
12897
# Should NOT raise — body is too long to be an error page
12998
await detect_rate_limit(mock_page)
13099

131-
async def test_normal_url_no_captcha_no_error_passes(self, mock_page):
100+
async def test_normal_url_no_error_passes(self, mock_page):
132101
"""A clean normal page passes all checks without raising."""
133102
main_locator = MagicMock()
134103
main_locator.count = AsyncMock(return_value=1)
135104

136-
captcha_locator = MagicMock()
137-
captcha_locator.count = AsyncMock(return_value=0)
138-
139105
def locator_side_effect(selector):
140-
if "captcha" in selector:
141-
return captcha_locator
142106
if selector == "main":
143107
return main_locator
144108
return MagicMock(count=AsyncMock(return_value=0))

0 commit comments

Comments
 (0)