mirror of
https://github.com/modelcontextprotocol/servers.git
synced 2026-04-20 12:55:21 +02:00
add doc strings for readabilty and constrain types
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
from typing import Optional
|
from typing import Optional, Tuple
|
||||||
from urllib.parse import urlparse, urlunparse
|
from urllib.parse import urlparse, urlunparse
|
||||||
|
|
||||||
import markdownify
|
import markdownify
|
||||||
@@ -17,13 +17,21 @@ from mcp.types import (
|
|||||||
INTERNAL_ERROR,
|
INTERNAL_ERROR,
|
||||||
)
|
)
|
||||||
from protego import Protego
|
from protego import Protego
|
||||||
from pydantic import BaseModel, Field, ValidationError
|
from pydantic import BaseModel, Field, AnyUrl, conint
|
||||||
|
|
||||||
DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
|
DEFAULT_USER_AGENT_AUTONOMOUS = "ModelContextProtocol/1.0 (Autonomous; +https://github.com/modelcontextprotocol/servers)"
|
||||||
DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
|
DEFAULT_USER_AGENT_MANUAL = "ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotocol/servers)"
|
||||||
|
|
||||||
|
|
||||||
def extract_content_from_html(html: str) -> str:
|
def extract_content_from_html(html: str) -> str:
|
||||||
|
"""Extract and convert HTML content to Markdown format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
html: Raw HTML content to process
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Simplified markdown version of the content
|
||||||
|
"""
|
||||||
ret = readabilipy.simple_json.simple_json_from_html_string(
|
ret = readabilipy.simple_json.simple_json_from_html_string(
|
||||||
html, use_readability=True
|
html, use_readability=True
|
||||||
)
|
)
|
||||||
@@ -36,9 +44,17 @@ def extract_content_from_html(html: str) -> str:
|
|||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
def get_robots_txt_url(url: str) -> str:
|
def get_robots_txt_url(url: AnyUrl | str) -> str:
|
||||||
|
"""Get the robots.txt URL for a given website URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Website URL to get robots.txt for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
URL of the robots.txt file
|
||||||
|
"""
|
||||||
# Parse the URL into components
|
# Parse the URL into components
|
||||||
parsed = urlparse(url)
|
parsed = urlparse(str(url))
|
||||||
|
|
||||||
# Reconstruct the base URL with just scheme, netloc, and /robots.txt path
|
# Reconstruct the base URL with just scheme, netloc, and /robots.txt path
|
||||||
robots_url = urlunparse((parsed.scheme, parsed.netloc, "/robots.txt", "", "", ""))
|
robots_url = urlunparse((parsed.scheme, parsed.netloc, "/robots.txt", "", "", ""))
|
||||||
@@ -46,7 +62,7 @@ def get_robots_txt_url(url: str) -> str:
|
|||||||
return robots_url
|
return robots_url
|
||||||
|
|
||||||
|
|
||||||
async def check_may_autonomously_fetch_url(url: str, user_agent: str):
|
async def check_may_autonomously_fetch_url(url: AnyUrl | str, user_agent: str) -> None:
|
||||||
"""
|
"""
|
||||||
Check if the URL can be fetched by the user agent according to the robots.txt file.
|
Check if the URL can be fetched by the user agent according to the robots.txt file.
|
||||||
Raises a McpError if not.
|
Raises a McpError if not.
|
||||||
@@ -89,7 +105,9 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def fetch_url(url: str, user_agent: str, force_raw: bool = False) -> (str, str):
|
async def fetch_url(
|
||||||
|
url: AnyUrl | str, user_agent: str, force_raw: bool = False
|
||||||
|
) -> Tuple[str, str]:
|
||||||
"""
|
"""
|
||||||
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
|
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
|
||||||
"""
|
"""
|
||||||
@@ -98,7 +116,7 @@ async def fetch_url(url: str, user_agent: str, force_raw: bool = False) -> (str,
|
|||||||
async with AsyncClient() as client:
|
async with AsyncClient() as client:
|
||||||
try:
|
try:
|
||||||
response = await client.get(
|
response = await client.get(
|
||||||
url,
|
str(url),
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
headers={"User-Agent": user_agent},
|
headers={"User-Agent": user_agent},
|
||||||
timeout=30,
|
timeout=30,
|
||||||
@@ -128,9 +146,13 @@ async def fetch_url(url: str, user_agent: str, force_raw: bool = False) -> (str,
|
|||||||
|
|
||||||
|
|
||||||
class Fetch(BaseModel):
|
class Fetch(BaseModel):
|
||||||
url: str = Field(..., description="URL to fetch")
|
"""Parameters for fetching a URL."""
|
||||||
max_length: int = Field(5000, description="Maximum number of characters to return.")
|
|
||||||
start_index: int = Field(
|
url: AnyUrl = Field(..., description="URL to fetch")
|
||||||
|
max_length: conint(gt=0, lt=1000000) = Field(
|
||||||
|
5000, description="Maximum number of characters to return."
|
||||||
|
)
|
||||||
|
start_index: conint(ge=0) = Field(
|
||||||
0,
|
0,
|
||||||
description="On return output starting at this character index, useful if a previous fetch was truncated and more context is required.",
|
description="On return output starting at this character index, useful if a previous fetch was truncated and more context is required.",
|
||||||
)
|
)
|
||||||
@@ -143,6 +165,12 @@ class Fetch(BaseModel):
|
|||||||
async def serve(
|
async def serve(
|
||||||
custom_user_agent: Optional[str] = None, ignore_robots_txt: bool = False
|
custom_user_agent: Optional[str] = None, ignore_robots_txt: bool = False
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""Run the fetch MCP server.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
custom_user_agent: Optional custom User-Agent string to use for requests
|
||||||
|
ignore_robots_txt: Whether to ignore robots.txt restrictions
|
||||||
|
"""
|
||||||
server = Server("mcp-fetch")
|
server = Server("mcp-fetch")
|
||||||
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
|
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
user_agent_manual = custom_user_agent or DEFAULT_USER_AGENT_MANUAL
|
user_agent_manual = custom_user_agent or DEFAULT_USER_AGENT_MANUAL
|
||||||
|
|||||||
Reference in New Issue
Block a user