mirror of
https://github.com/modelcontextprotocol/servers.git
synced 2026-04-17 15:53:23 +02:00
test(fetch): add unit tests for fetch MCP server
Add comprehensive test coverage for the fetch server: - TestGetRobotsTxtUrl: 6 tests for URL parsing - TestExtractContentFromHtml: 3 tests for HTML-to-markdown conversion - TestCheckMayAutonomouslyFetchUrl: 5 tests for robots.txt handling - TestFetchUrl: 6 tests for URL fetching with various scenarios Total: 20 tests covering: - URL parsing and robots.txt URL generation - HTML content extraction and markdown conversion - robots.txt permission checking (401, 403, 404, allow/disallow) - HTTP response handling (success, errors, raw mode) - Proxy support Also adds pytest and pytest-asyncio as dev dependencies.
This commit is contained in:
@@ -33,4 +33,8 @@ requires = ["hatchling"]
|
|||||||
build-backend = "hatchling.build"
|
build-backend = "hatchling.build"
|
||||||
|
|
||||||
[tool.uv]
|
[tool.uv]
|
||||||
dev-dependencies = ["pyright>=1.1.389", "ruff>=0.7.3"]
|
dev-dependencies = ["pyright>=1.1.389", "ruff>=0.7.3", "pytest>=8.0.0", "pytest-asyncio>=0.21.0"]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["tests"]
|
||||||
|
asyncio_mode = "auto"
|
||||||
|
|||||||
0
src/fetch/tests/__init__.py
Normal file
0
src/fetch/tests/__init__.py
Normal file
326
src/fetch/tests/test_server.py
Normal file
326
src/fetch/tests/test_server.py
Normal file
@@ -0,0 +1,326 @@
|
|||||||
|
"""Tests for the fetch MCP server."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, patch, MagicMock
|
||||||
|
from mcp.shared.exceptions import McpError
|
||||||
|
|
||||||
|
from mcp_server_fetch.server import (
|
||||||
|
extract_content_from_html,
|
||||||
|
get_robots_txt_url,
|
||||||
|
check_may_autonomously_fetch_url,
|
||||||
|
fetch_url,
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetRobotsTxtUrl:
|
||||||
|
"""Tests for get_robots_txt_url function."""
|
||||||
|
|
||||||
|
def test_simple_url(self):
|
||||||
|
"""Test with a simple URL."""
|
||||||
|
result = get_robots_txt_url("https://example.com/page")
|
||||||
|
assert result == "https://example.com/robots.txt"
|
||||||
|
|
||||||
|
def test_url_with_path(self):
|
||||||
|
"""Test with URL containing path."""
|
||||||
|
result = get_robots_txt_url("https://example.com/some/deep/path/page.html")
|
||||||
|
assert result == "https://example.com/robots.txt"
|
||||||
|
|
||||||
|
def test_url_with_query_params(self):
|
||||||
|
"""Test with URL containing query parameters."""
|
||||||
|
result = get_robots_txt_url("https://example.com/page?foo=bar&baz=qux")
|
||||||
|
assert result == "https://example.com/robots.txt"
|
||||||
|
|
||||||
|
def test_url_with_port(self):
|
||||||
|
"""Test with URL containing port number."""
|
||||||
|
result = get_robots_txt_url("https://example.com:8080/page")
|
||||||
|
assert result == "https://example.com:8080/robots.txt"
|
||||||
|
|
||||||
|
def test_url_with_fragment(self):
|
||||||
|
"""Test with URL containing fragment."""
|
||||||
|
result = get_robots_txt_url("https://example.com/page#section")
|
||||||
|
assert result == "https://example.com/robots.txt"
|
||||||
|
|
||||||
|
def test_http_url(self):
|
||||||
|
"""Test with HTTP URL."""
|
||||||
|
result = get_robots_txt_url("http://example.com/page")
|
||||||
|
assert result == "http://example.com/robots.txt"
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractContentFromHtml:
|
||||||
|
"""Tests for extract_content_from_html function."""
|
||||||
|
|
||||||
|
def test_simple_html(self):
|
||||||
|
"""Test with simple HTML content."""
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<head><title>Test Page</title></head>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Hello World</h1>
|
||||||
|
<p>This is a test paragraph.</p>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
result = extract_content_from_html(html)
|
||||||
|
# readabilipy may extract different parts depending on the content
|
||||||
|
assert "test paragraph" in result
|
||||||
|
|
||||||
|
def test_html_with_links(self):
|
||||||
|
"""Test that links are converted to markdown."""
|
||||||
|
html = """
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<p>Visit <a href="https://example.com">Example</a> for more.</p>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
result = extract_content_from_html(html)
|
||||||
|
assert "Example" in result
|
||||||
|
|
||||||
|
def test_empty_content_returns_error(self):
|
||||||
|
"""Test that empty/invalid HTML returns error message."""
|
||||||
|
html = ""
|
||||||
|
result = extract_content_from_html(html)
|
||||||
|
assert "<error>" in result
|
||||||
|
|
||||||
|
|
||||||
|
class TestCheckMayAutonomouslyFetchUrl:
|
||||||
|
"""Tests for check_may_autonomously_fetch_url function."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_allows_when_robots_txt_404(self):
|
||||||
|
"""Test that fetching is allowed when robots.txt returns 404."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 404
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
# Should not raise
|
||||||
|
await check_may_autonomously_fetch_url(
|
||||||
|
"https://example.com/page",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_blocks_when_robots_txt_401(self):
|
||||||
|
"""Test that fetching is blocked when robots.txt returns 401."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 401
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
with pytest.raises(McpError):
|
||||||
|
await check_may_autonomously_fetch_url(
|
||||||
|
"https://example.com/page",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_blocks_when_robots_txt_403(self):
|
||||||
|
"""Test that fetching is blocked when robots.txt returns 403."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 403
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
with pytest.raises(McpError):
|
||||||
|
await check_may_autonomously_fetch_url(
|
||||||
|
"https://example.com/page",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_allows_when_robots_txt_allows_all(self):
|
||||||
|
"""Test that fetching is allowed when robots.txt allows all."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.text = "User-agent: *\nAllow: /"
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
# Should not raise
|
||||||
|
await check_may_autonomously_fetch_url(
|
||||||
|
"https://example.com/page",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_blocks_when_robots_txt_disallows_all(self):
|
||||||
|
"""Test that fetching is blocked when robots.txt disallows all."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.text = "User-agent: *\nDisallow: /"
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
with pytest.raises(McpError):
|
||||||
|
await check_may_autonomously_fetch_url(
|
||||||
|
"https://example.com/page",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFetchUrl:
|
||||||
|
"""Tests for fetch_url function."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_html_page(self):
|
||||||
|
"""Test fetching an HTML page returns markdown content."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.text = """
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<article>
|
||||||
|
<h1>Test Page</h1>
|
||||||
|
<p>Hello World</p>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
mock_response.headers = {"content-type": "text/html"}
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
content, prefix = await fetch_url(
|
||||||
|
"https://example.com/page",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
# HTML is processed, so we check it returns something
|
||||||
|
assert isinstance(content, str)
|
||||||
|
assert prefix == ""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_html_page_raw(self):
|
||||||
|
"""Test fetching an HTML page with raw=True returns original HTML."""
|
||||||
|
html_content = "<html><body><h1>Test</h1></body></html>"
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.text = html_content
|
||||||
|
mock_response.headers = {"content-type": "text/html"}
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
content, prefix = await fetch_url(
|
||||||
|
"https://example.com/page",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS,
|
||||||
|
force_raw=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert content == html_content
|
||||||
|
assert "cannot be simplified" in prefix
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_json_returns_raw(self):
|
||||||
|
"""Test fetching JSON content returns raw content."""
|
||||||
|
json_content = '{"key": "value"}'
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.text = json_content
|
||||||
|
mock_response.headers = {"content-type": "application/json"}
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
content, prefix = await fetch_url(
|
||||||
|
"https://api.example.com/data",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
assert content == json_content
|
||||||
|
assert "cannot be simplified" in prefix
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_404_raises_error(self):
|
||||||
|
"""Test that 404 response raises McpError."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 404
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
with pytest.raises(McpError):
|
||||||
|
await fetch_url(
|
||||||
|
"https://example.com/notfound",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_500_raises_error(self):
|
||||||
|
"""Test that 500 response raises McpError."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 500
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
with pytest.raises(McpError):
|
||||||
|
await fetch_url(
|
||||||
|
"https://example.com/error",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_fetch_with_proxy(self):
|
||||||
|
"""Test that proxy URL is passed to client."""
|
||||||
|
mock_response = MagicMock()
|
||||||
|
mock_response.status_code = 200
|
||||||
|
mock_response.text = '{"data": "test"}'
|
||||||
|
mock_response.headers = {"content-type": "application/json"}
|
||||||
|
|
||||||
|
with patch("httpx.AsyncClient") as mock_client_class:
|
||||||
|
mock_client = AsyncMock()
|
||||||
|
mock_client.get = AsyncMock(return_value=mock_response)
|
||||||
|
mock_client_class.return_value.__aenter__ = AsyncMock(return_value=mock_client)
|
||||||
|
mock_client_class.return_value.__aexit__ = AsyncMock(return_value=None)
|
||||||
|
|
||||||
|
await fetch_url(
|
||||||
|
"https://example.com/data",
|
||||||
|
DEFAULT_USER_AGENT_AUTONOMOUS,
|
||||||
|
proxy_url="http://proxy.example.com:8080"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify AsyncClient was called with proxy
|
||||||
|
mock_client_class.assert_called_once_with(proxies="http://proxy.example.com:8080")
|
||||||
1444
src/fetch/uv.lock
generated
1444
src/fetch/uv.lock
generated
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user