Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/fetch/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
]
dependencies = [
"httpx>=0.27",
"httpx[socks]>=0.27",
"markdownify>=0.13.1",
"mcp>=1.1.3",
"protego>=0.3.1",
Expand Down
36 changes: 34 additions & 2 deletions src/fetch/src/mcp_server_fetch/server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from typing import Annotated, Tuple
from urllib.parse import urlparse, urlunparse

Expand Down Expand Up @@ -63,6 +64,37 @@ def get_robots_txt_url(url: str) -> str:
return robots_url


def normalize_proxy_url(proxy_url: str | None) -> str | None:
if proxy_url is None:
return None

if proxy_url.lower().startswith("socks://"):
return f"socks5://{proxy_url[len('socks://'):]}"

return proxy_url


def proxy_url_for_request(url: str, proxy_url: str | None = None) -> str | None:
if proxy_url:
return normalize_proxy_url(proxy_url)

scheme = urlparse(url).scheme.lower()
proxy_keys = []
if scheme == "https":
proxy_keys.extend(("HTTPS_PROXY", "https_proxy"))
elif scheme == "http":
proxy_keys.extend(("HTTP_PROXY", "http_proxy"))
proxy_keys.extend(("ALL_PROXY", "all_proxy"))

for key in proxy_keys:
env_proxy = os.environ.get(key)
normalized = normalize_proxy_url(env_proxy)
if normalized != env_proxy:
return normalized

return None


async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
"""
Check if the URL can be fetched by the user agent according to the robots.txt file.
Expand All @@ -72,7 +104,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url:

robot_txt_url = get_robots_txt_url(url)

async with AsyncClient(proxy=proxy_url) as client:
async with AsyncClient(proxy=proxy_url_for_request(robot_txt_url, proxy_url)) as client:
try:
response = await client.get(
robot_txt_url,
Expand Down Expand Up @@ -116,7 +148,7 @@ async def fetch_url(
"""
from httpx import AsyncClient, HTTPError

async with AsyncClient(proxy=proxy_url) as client:
async with AsyncClient(proxy=proxy_url_for_request(url, proxy_url)) as client:
try:
response = await client.get(
url,
Expand Down
33 changes: 33 additions & 0 deletions src/fetch/tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
check_may_autonomously_fetch_url,
fetch_url,
DEFAULT_USER_AGENT_AUTONOMOUS,
proxy_url_for_request,
)


Expand Down Expand Up @@ -324,3 +325,35 @@ async def test_fetch_with_proxy(self):

# Verify AsyncClient was called with proxy
mock_client_class.assert_called_once_with(proxy="http://proxy.example.com:8080")

@pytest.mark.asyncio
async def test_fetch_accepts_socks_proxy_alias(self):
"""Test that socks:// proxy URLs are accepted as SOCKS5 proxies."""
mock_response = MagicMock()
mock_response.status_code = 200
mock_response.text = '{"data": "test"}'
mock_response.headers = {"content-type": "application/json"}

with patch("httpx.AsyncClient") as mock_client_class:
mock_client = AsyncMock()
mock_client.get = AsyncMock(return_value=mock_response)
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)

await fetch_url(
"https://example.com/data",
DEFAULT_USER_AGENT_AUTONOMOUS,
proxy_url="socks://127.0.0.1:2080/"
)

mock_client_class.assert_called_once_with(proxy="socks5://127.0.0.1:2080/")

def test_fetch_accepts_socks_proxy_from_environment(self, monkeypatch):
"""Test that invalid socks:// environment proxies are normalized."""
monkeypatch.delenv("HTTPS_PROXY", raising=False)
monkeypatch.delenv("https_proxy", raising=False)
monkeypatch.delenv("ALL_PROXY", raising=False)
monkeypatch.delenv("all_proxy", raising=False)
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:2080/")

assert proxy_url_for_request("https://example.com/data") == "socks5://127.0.0.1:2080/"
18 changes: 16 additions & 2 deletions src/fetch/uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading