Merge remote-tracking branch 'upstream/main' into burkeholland-vscode-install-instructions

2026-04-18 00:03:23 +02:00 · 2025-04-10 09:29:23 -05:00
parent e6db372447 e8f0b15f41
commit 76b3d7b5de
16 changed files with 488 additions and 84 deletions
--- a/src/fetch/README.md
+++ b/src/fetch/README.md
@@ -155,6 +155,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc

 This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.

+### Customization - Proxy
+
+The server can be configured to use a proxy by using the `--proxy-url` argument.
+
 ## Debugging

 You can use the MCP inspector to debug the server. For uvx installations:
--- a/src/fetch/pyproject.toml
+++ b/src/fetch/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mcp-server-fetch"
-version = "0.6.2"
+version = "0.6.3"
 description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -16,6 +16,7 @@ classifiers = [
    "Programming Language :: Python :: 3.10",
 ]
 dependencies = [
+    "httpx<0.28",
    "markdownify>=0.13.1",
    "mcp>=1.1.3",
    "protego>=0.3.1",
--- a/src/fetch/src/mcp_server_fetch/init.py
+++ b/src/fetch/src/mcp_server_fetch/init.py
@@ -15,9 +15,10 @@ def main():
        action="store_true",
        help="Ignore robots.txt restrictions",
    )
+    parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")

    args = parser.parse_args()
-    asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
+    asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))


 if __name__ == "__main__":
--- a/src/fetch/src/mcp_server_fetch/server.py
+++ b/src/fetch/src/mcp_server_fetch/server.py
@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
    return robots_url


-async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
+async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
    """
    Check if the URL can be fetched by the user agent according to the robots.txt file.
    Raises a McpError if not.
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:

    robot_txt_url = get_robots_txt_url(url)

-    async with AsyncClient() as client:
+    async with AsyncClient(proxies=proxy_url) as client:
        try:
            response = await client.get(
                robot_txt_url,
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:


 async def fetch_url(
-    url: str, user_agent: str, force_raw: bool = False
+    url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
 ) -> Tuple[str, str]:
    """
    Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
    """
    from httpx import AsyncClient, HTTPError

-    async with AsyncClient() as client:
+    async with AsyncClient(proxies=proxy_url) as client:
        try:
            response = await client.get(
                url,
@@ -173,19 +173,22 @@ class Fetch(BaseModel):
        bool,
        Field(
            default=False,
-            description="Get the actual HTML content if the requested page, without simplification.",
+            description="Get the actual HTML content of the requested page, without simplification.",
        ),
    ]


 async def serve(
-    custom_user_agent: str | None = None, ignore_robots_txt: bool = False
+    custom_user_agent: str | None = None,
+    ignore_robots_txt: bool = False,
+    proxy_url: str | None = None,
 ) -> None:
    """Run the fetch MCP server.

    Args:
        custom_user_agent: Optional custom User-Agent string to use for requests
        ignore_robots_txt: Whether to ignore robots.txt restrictions
+        proxy_url: Optional proxy URL to use for requests
    """
    server = Server("mcp-fetch")
    user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -229,10 +232,10 @@ Although originally you did not have internet access, and were advised to refuse
            raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))

        if not ignore_robots_txt:
-            await check_may_autonomously_fetch_url(url, user_agent_autonomous)
+            await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)

        content, prefix = await fetch_url(
-            url, user_agent_autonomous, force_raw=args.raw
+            url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
        )
        original_length = len(content)
        if args.start_index >= original_length:
@@ -259,7 +262,7 @@ Although originally you did not have internet access, and were advised to refuse
        url = arguments["url"]

        try:
-            content, prefix = await fetch_url(url, user_agent_manual)
+            content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
            # TODO: after SDK bug is addressed, don't catch the exception
        except McpError as e:
            return GetPromptResult(