mirror of
https://github.com/modelcontextprotocol/servers.git
synced 2026-04-18 00:03:23 +02:00
Merge remote-tracking branch 'upstream/main' into burkeholland-vscode-install-instructions
This commit is contained in:
@@ -155,6 +155,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc
|
||||
|
||||
This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.
|
||||
|
||||
### Customization - Proxy
|
||||
|
||||
The server can be configured to use a proxy by using the `--proxy-url` argument.
|
||||
|
||||
## Debugging
|
||||
|
||||
You can use the MCP inspector to debug the server. For uvx installations:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "mcp-server-fetch"
|
||||
version = "0.6.2"
|
||||
version = "0.6.3"
|
||||
description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
@@ -16,6 +16,7 @@ classifiers = [
|
||||
"Programming Language :: Python :: 3.10",
|
||||
]
|
||||
dependencies = [
|
||||
"httpx<0.28",
|
||||
"markdownify>=0.13.1",
|
||||
"mcp>=1.1.3",
|
||||
"protego>=0.3.1",
|
||||
|
||||
@@ -15,9 +15,10 @@ def main():
|
||||
action="store_true",
|
||||
help="Ignore robots.txt restrictions",
|
||||
)
|
||||
parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
|
||||
|
||||
args = parser.parse_args()
|
||||
asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
|
||||
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
|
||||
return robots_url
|
||||
|
||||
|
||||
async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
|
||||
async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
|
||||
"""
|
||||
Check if the URL can be fetched by the user agent according to the robots.txt file.
|
||||
Raises a McpError if not.
|
||||
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
|
||||
|
||||
robot_txt_url = get_robots_txt_url(url)
|
||||
|
||||
async with AsyncClient() as client:
|
||||
async with AsyncClient(proxies=proxy_url) as client:
|
||||
try:
|
||||
response = await client.get(
|
||||
robot_txt_url,
|
||||
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
|
||||
|
||||
|
||||
async def fetch_url(
|
||||
url: str, user_agent: str, force_raw: bool = False
|
||||
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
|
||||
) -> Tuple[str, str]:
|
||||
"""
|
||||
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
|
||||
"""
|
||||
from httpx import AsyncClient, HTTPError
|
||||
|
||||
async with AsyncClient() as client:
|
||||
async with AsyncClient(proxies=proxy_url) as client:
|
||||
try:
|
||||
response = await client.get(
|
||||
url,
|
||||
@@ -173,19 +173,22 @@ class Fetch(BaseModel):
|
||||
bool,
|
||||
Field(
|
||||
default=False,
|
||||
description="Get the actual HTML content if the requested page, without simplification.",
|
||||
description="Get the actual HTML content of the requested page, without simplification.",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
async def serve(
|
||||
custom_user_agent: str | None = None, ignore_robots_txt: bool = False
|
||||
custom_user_agent: str | None = None,
|
||||
ignore_robots_txt: bool = False,
|
||||
proxy_url: str | None = None,
|
||||
) -> None:
|
||||
"""Run the fetch MCP server.
|
||||
|
||||
Args:
|
||||
custom_user_agent: Optional custom User-Agent string to use for requests
|
||||
ignore_robots_txt: Whether to ignore robots.txt restrictions
|
||||
proxy_url: Optional proxy URL to use for requests
|
||||
"""
|
||||
server = Server("mcp-fetch")
|
||||
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
|
||||
@@ -229,10 +232,10 @@ Although originally you did not have internet access, and were advised to refuse
|
||||
raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))
|
||||
|
||||
if not ignore_robots_txt:
|
||||
await check_may_autonomously_fetch_url(url, user_agent_autonomous)
|
||||
await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)
|
||||
|
||||
content, prefix = await fetch_url(
|
||||
url, user_agent_autonomous, force_raw=args.raw
|
||||
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
|
||||
)
|
||||
original_length = len(content)
|
||||
if args.start_index >= original_length:
|
||||
@@ -259,7 +262,7 @@ Although originally you did not have internet access, and were advised to refuse
|
||||
url = arguments["url"]
|
||||
|
||||
try:
|
||||
content, prefix = await fetch_url(url, user_agent_manual)
|
||||
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
|
||||
# TODO: after SDK bug is addressed, don't catch the exception
|
||||
except McpError as e:
|
||||
return GetPromptResult(
|
||||
|
||||
Reference in New Issue
Block a user