From 64a654744a414e76fa007063d6ea5425d17d574d Mon Sep 17 00:00:00 2001 From: shiquda Date: Thu, 13 Mar 2025 00:20:48 +0800 Subject: [PATCH 1/6] feat(fetch): add support for using proxy for requests --- src/fetch/README.md | 4 ++++ src/fetch/src/mcp_server_fetch/__init__.py | 3 ++- src/fetch/src/mcp_server_fetch/server.py | 19 +++++++++++-------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/fetch/README.md b/src/fetch/README.md index 0e58b3de..01d99cac 100644 --- a/src/fetch/README.md +++ b/src/fetch/README.md @@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration. +### Customization - Proxy + +The server can be configured to use a proxy by using the `--proxy-url` argument. + ## Debugging You can use the MCP inspector to debug the server. For uvx installations: diff --git a/src/fetch/src/mcp_server_fetch/__init__.py b/src/fetch/src/mcp_server_fetch/__init__.py index e3a35d54..09744ce3 100644 --- a/src/fetch/src/mcp_server_fetch/__init__.py +++ b/src/fetch/src/mcp_server_fetch/__init__.py @@ -15,9 +15,10 @@ def main(): action="store_true", help="Ignore robots.txt restrictions", ) + parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests") args = parser.parse_args() - asyncio.run(serve(args.user_agent, args.ignore_robots_txt)) + asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url)) if __name__ == "__main__": diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index 775d79c8..24c6a875 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str: return robots_url -async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None: +async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None: """ Check if the URL can be fetched by the user agent according to the robots.txt file. Raises a McpError if not. @@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None: robot_txt_url = get_robots_txt_url(url) - async with AsyncClient() as client: + async with AsyncClient(proxies=proxy_url) as client: try: response = await client.get( robot_txt_url, @@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None: async def fetch_url( - url: str, user_agent: str, force_raw: bool = False + url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None ) -> Tuple[str, str]: """ Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information. """ from httpx import AsyncClient, HTTPError - async with AsyncClient() as client: + async with AsyncClient(proxies=proxy_url) as client: try: response = await client.get( url, @@ -179,13 +179,16 @@ class Fetch(BaseModel): async def serve( - custom_user_agent: str | None = None, ignore_robots_txt: bool = False + custom_user_agent: str | None = None, + ignore_robots_txt: bool = False, + proxy_url: str | None = None, ) -> None: """Run the fetch MCP server. Args: custom_user_agent: Optional custom User-Agent string to use for requests ignore_robots_txt: Whether to ignore robots.txt restrictions + proxy_url: Optional proxy URL to use for requests """ server = Server("mcp-fetch") user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS @@ -229,10 +232,10 @@ Although originally you did not have internet access, and were advised to refuse raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required")) if not ignore_robots_txt: - await check_may_autonomously_fetch_url(url, user_agent_autonomous) + await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url) content, prefix = await fetch_url( - url, user_agent_autonomous, force_raw=args.raw + url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url ) original_length = len(content) if args.start_index >= original_length: @@ -259,7 +262,7 @@ Although originally you did not have internet access, and were advised to refuse url = arguments["url"] try: - content, prefix = await fetch_url(url, user_agent_manual) + content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url) # TODO: after SDK bug is addressed, don't catch the exception except McpError as e: return GetPromptResult( From 0490e08253541371d83dab33c8ee93e7c0c5cab1 Mon Sep 17 00:00:00 2001 From: Wilhelm Klopp Date: Wed, 26 Mar 2025 18:52:33 +0000 Subject: [PATCH 2/6] Fix typo in mcp_server_fetch --- src/fetch/src/mcp_server_fetch/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fetch/src/mcp_server_fetch/server.py b/src/fetch/src/mcp_server_fetch/server.py index 775d79c8..b09a347c 100644 --- a/src/fetch/src/mcp_server_fetch/server.py +++ b/src/fetch/src/mcp_server_fetch/server.py @@ -173,7 +173,7 @@ class Fetch(BaseModel): bool, Field( default=False, - description="Get the actual HTML content if the requested page, without simplification.", + description="Get the actual HTML content of the requested page, without simplification.", ), ] From e37940b7bc541b1427ac7ec908e31f1bfaa19159 Mon Sep 17 00:00:00 2001 From: Dennis Zhuang Date: Fri, 28 Mar 2025 14:12:34 +0800 Subject: [PATCH 3/6] Add GreptimeDB mcp server --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b0cc79dc..1ebd7d52 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ Official integrations are maintained by companies building production ready MCP - gotoHuman Logo **[gotoHuman](https://github.com/gotohuman/gotohuman-mcp-server)** - Human-in-the-loop platform - Allow AI agents and automations to send requests for approval to your [gotoHuman](https://www.gotohuman.com) inbox. - Grafana Logo **[Grafana](https://github.com/grafana/mcp-grafana)** - Search dashboards, investigate incidents and query datasources in your Grafana instance - Graphlit Logo **[Graphlit](https://github.com/graphlit/graphlit-mcp-server)** - Ingest anything from Slack to Gmail to podcast feeds, in addition to web crawling, into a searchable [Graphlit](https://www.graphlit.com) project. +- Greptime Logo **[GreptimeDB](https://github.com/GreptimeTeam/greptimedb-mcp-server)** - Provides AI assistants with a secure and structured way to explore and analyze data in [GreptimeDB](https://github.com/GreptimeTeam/greptimedb). - Hologres Logo **[Hologres](https://github.com/aliyun/alibabacloud-hologres-mcp-server)** - Connect to a [Hologres](https://www.alibabacloud.com/en/product/hologres) instance, get table metadata, query and analyze data. - Hyperbrowsers23 Logo **[Hyperbrowser](https://github.com/hyperbrowserai/mcp)** - [Hyperbrowser](https://www.hyperbrowser.ai/) is the next-generation platform empowering AI agents and enabling effortless, scalable browser automation. - **[IBM wxflows](https://github.com/IBM/wxflows/tree/main/examples/mcp/javascript)** - Tool platform by IBM to build, test and deploy tools for any data source From 4f93f82009d1e2c0d8e852b34f3295fee84bd97e Mon Sep 17 00:00:00 2001 From: AB498 Date: Sun, 30 Mar 2025 04:13:59 +0600 Subject: [PATCH 4/6] reviewed: fix: detached frame error & feature: Puppeteer launch arguments support --- src/puppeteer/README.md | 41 +++++++++++++++++- src/puppeteer/index.ts | 92 ++++++++++++++++++++++++++++++++++++----- 2 files changed, 120 insertions(+), 13 deletions(-) diff --git a/src/puppeteer/README.md b/src/puppeteer/README.md index 7c7e8160..794cfdf1 100644 --- a/src/puppeteer/README.md +++ b/src/puppeteer/README.md @@ -8,7 +8,10 @@ A Model Context Protocol server that provides browser automation capabilities us - **puppeteer_navigate** - Navigate to any URL in the browser - - Input: `url` (string) + - Inputs: + - `url` (string, required): URL to navigate to + - `launchOptions` (object, optional): PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: `{ headless: true, args: ['--user-data-dir="C:/Data"'] }` + - `allowDangerous` (boolean, optional): Allow dangerous LaunchOptions that reduce security. When false, dangerous args like `--no-sandbox`, `--disable-web-security` will throw errors. Default false. - **puppeteer_screenshot** - Capture screenshots of the entire page or specific elements @@ -61,6 +64,7 @@ The server provides access to two types of resources: - Screenshot capabilities - JavaScript execution - Basic web interaction (navigation, clicking, form filling) +- Customizable Puppeteer launch options ## Configuration to use Puppeteer Server Here's the Claude Desktop configuration to use the Puppeter server: @@ -93,6 +97,39 @@ Here's the Claude Desktop configuration to use the Puppeter server: } ``` +### Launch Options + +You can customize Puppeteer's browser behavior in two ways: + +1. **Environment Variable**: Set `PUPPETEER_LAUNCH_OPTIONS` with a JSON-encoded string in the MCP configuration's `env` parameter: + + ```json + { + "mcpServers": { + "mcp-puppeteer": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-puppeteer"] + "env": { + "PUPPETEER_LAUNCH_OPTIONS": "{ \"headless\": false, \"executablePath\": \"C:/Program Files/Google/Chrome/Application/chrome.exe\", \"args\": [] }", + "ALLOW_DANGEROUS": "true" + } + } + } + } + ``` + +2. **Tool Call Arguments**: Pass `launchOptions` and `allowDangerous` parameters to the `puppeteer_navigate` tool: + + ```json + { + "url": "https://example.com", + "launchOptions": { + "headless": false, + "defaultViewport": {"width": 1280, "height": 720} + } + } + ``` + ## Build Docker build: @@ -103,4 +140,4 @@ docker build -t mcp/puppeteer -f src/puppeteer/Dockerfile . ## License -This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository. +This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository. \ No newline at end of file diff --git a/src/puppeteer/index.ts b/src/puppeteer/index.ts index fda97164..1bee5220 100644 --- a/src/puppeteer/index.ts +++ b/src/puppeteer/index.ts @@ -22,7 +22,9 @@ const TOOLS: Tool[] = [ inputSchema: { type: "object", properties: { - url: { type: "string" }, + url: { type: "string", description: "URL to navigate to" }, + launchOptions: { type: "object", description: "PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: { headless: true, args: ['--no-sandbox'] }" }, + allowDangerous: { type: "boolean", description: "Allow dangerous LaunchOptions that reduce security. When false, dangerous args like --no-sandbox will throw errors. Default false." }, }, required: ["url"], }, @@ -101,16 +103,65 @@ const TOOLS: Tool[] = [ ]; // Global state -let browser: Browser | undefined; -let page: Page | undefined; +let browser: Browser | null; +let page: Page | null; const consoleLogs: string[] = []; const screenshots = new Map(); +let previousLaunchOptions: any = null; + +async function ensureBrowser({ launchOptions, allowDangerous }: any) { + + const DANGEROUS_ARGS = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--single-process', + '--disable-web-security', + '--ignore-certificate-errors', + '--disable-features=IsolateOrigins', + '--disable-site-isolation-trials', + '--allow-running-insecure-content' + ]; + + // Parse environment config safely + let envConfig = {}; + try { + envConfig = JSON.parse(process.env.PUPPETEER_LAUNCH_OPTIONS || '{}'); + } catch (error: any) { + console.warn('Failed to parse PUPPETEER_LAUNCH_OPTIONS:', error?.message || error); + } + + // Deep merge environment config with user-provided options + const mergedConfig = deepMerge(envConfig, launchOptions || {}); + + // Security validation for merged config + if (mergedConfig?.args) { + const dangerousArgs = mergedConfig.args?.filter?.((arg: string) => DANGEROUS_ARGS.some((dangerousArg: string) => arg.startsWith(dangerousArg))); + if (dangerousArgs?.length > 0 && !(allowDangerous || (process.env.ALLOW_DANGEROUS === 'true'))) { + throw new Error(`Dangerous browser arguments detected: ${dangerousArgs.join(', ')}. Fround from environment variable and tool call argument. ` + + 'Set allowDangerous: true in the tool call arguments to override.'); + } + } + + try { + if ((browser && !browser.connected) || + (launchOptions && (JSON.stringify(launchOptions) != JSON.stringify(previousLaunchOptions)))) { + await browser?.close(); + browser = null; + } + } + catch (error) { + browser = null; + } + + previousLaunchOptions = launchOptions; -async function ensureBrowser() { if (!browser) { const npx_args = { headless: false } const docker_args = { headless: true, args: ["--no-sandbox", "--single-process", "--no-zygote"] } - browser = await puppeteer.launch(process.env.DOCKER_CONTAINER ? docker_args : npx_args); + browser = await puppeteer.launch(deepMerge( + process.env.DOCKER_CONTAINER ? docker_args : npx_args, + mergedConfig + )); const pages = await browser.pages(); page = pages[0]; @@ -126,6 +177,25 @@ async function ensureBrowser() { return page!; } +// Deep merge utility function +function deepMerge(target: any, source: any): any { + const output = Object.assign({}, target); + if (typeof target !== 'object' || typeof source !== 'object') return source; + + for (const key of Object.keys(source)) { + const targetVal = target[key]; + const sourceVal = source[key]; + if (Array.isArray(targetVal) && Array.isArray(sourceVal)) { + output[key] = [...targetVal, ...sourceVal]; + } else if (sourceVal instanceof Object && key in target) { + output[key] = deepMerge(targetVal, sourceVal); + } else { + output[key] = sourceVal; + } + } + return output; +} + declare global { interface Window { mcpHelper: { @@ -136,7 +206,7 @@ declare global { } async function handleToolCall(name: string, args: any): Promise { - const page = await ensureBrowser(); + const page = await ensureBrowser(args); switch (name) { case "puppeteer_navigate": @@ -285,15 +355,15 @@ async function handleToolCall(name: string, args: any): Promise window.mcpHelper.logs.push(`[${method}] ${args.join(' ')}`); (window.mcpHelper.originalConsole as any)[method](...args); }; - } ); - } ); + }); + }); - const result = await page.evaluate( args.script ); + const result = await page.evaluate(args.script); const logs = await page.evaluate(() => { Object.assign(console, window.mcpHelper.originalConsole); const logs = window.mcpHelper.logs; - delete ( window as any).mcpHelper; + delete (window as any).mcpHelper; return logs; }); @@ -405,4 +475,4 @@ runServer().catch(console.error); process.stdin.on("close", () => { console.error("Puppeteer MCP Server closed"); server.close(); -}); +}); \ No newline at end of file From 94029e6252fe8c8f78480fb90811fd2a24fd52e2 Mon Sep 17 00:00:00 2001 From: AB498 Date: Sun, 30 Mar 2025 13:24:43 +0600 Subject: [PATCH 5/6] puppeteer server: deduplication of launch option args --- src/puppeteer/index.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/puppeteer/index.ts b/src/puppeteer/index.ts index 1bee5220..1849c783 100644 --- a/src/puppeteer/index.ts +++ b/src/puppeteer/index.ts @@ -186,7 +186,13 @@ function deepMerge(target: any, source: any): any { const targetVal = target[key]; const sourceVal = source[key]; if (Array.isArray(targetVal) && Array.isArray(sourceVal)) { - output[key] = [...targetVal, ...sourceVal]; + // Deduplicate args/ignoreDefaultArgs, prefer source values + output[key] = [...new Set([ + ...(key === 'args' || key === 'ignoreDefaultArgs' ? + targetVal.filter((arg: string) => !sourceVal.some((launchArg: string) => arg.startsWith('--') && launchArg.startsWith(arg.split('=')[0]))) : + targetVal), + ...sourceVal + ])]; } else if (sourceVal instanceof Object && key in target) { output[key] = deepMerge(targetVal, sourceVal); } else { From 9a4d513724b4fbe638ca24f6f86e6fc3ca373e89 Mon Sep 17 00:00:00 2001 From: shiquda Date: Sun, 30 Mar 2025 22:33:51 +0800 Subject: [PATCH 6/6] fix(fetch): specify httpx<0.28 to resolve proxy problem - AsyncClient.__init__() got an unexpected keyword argument 'proxies' --- src/fetch/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fetch/pyproject.toml b/src/fetch/pyproject.toml index ed76fdcd..bbee516a 100644 --- a/src/fetch/pyproject.toml +++ b/src/fetch/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-server-fetch" -version = "0.6.2" +version = "0.6.3" description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs" readme = "README.md" requires-python = ">=3.10" @@ -16,6 +16,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", ] dependencies = [ + "httpx<0.28", "markdownify>=0.13.1", "mcp>=1.1.3", "protego>=0.3.1",