Merge branch 'main' into add_mcp_server_my_apple_remembers

This commit is contained in:
Bary Huang
2025-03-31 21:41:09 -07:00
committed by GitHub
7 changed files with 147 additions and 24 deletions

View File

@@ -73,6 +73,7 @@ Official integrations are maintained by companies building production ready MCP
- <img height="12" width="12" src="https://cdn.prod.website-files.com/6605a2979ff17b2cd1939cd4/6605a460de47e7596ed84f06_icon256.png" alt="gotoHuman Logo" /> **[gotoHuman](https://github.com/gotohuman/gotohuman-mcp-server)** - Human-in-the-loop platform - Allow AI agents and automations to send requests for approval to your [gotoHuman](https://www.gotohuman.com) inbox.
- <img height="12" width="12" src="https://grafana.com/favicon.ico" alt="Grafana Logo" /> **[Grafana](https://github.com/grafana/mcp-grafana)** - Search dashboards, investigate incidents and query datasources in your Grafana instance
- <img height="12" width="12" src="https://framerusercontent.com/images/KCOWBYLKunDff1Dr452y6EfjiU.png" alt="Graphlit Logo" /> **[Graphlit](https://github.com/graphlit/graphlit-mcp-server)** - Ingest anything from Slack to Gmail to podcast feeds, in addition to web crawling, into a searchable [Graphlit](https://www.graphlit.com) project.
- <img height="12" width="12" src="https://greptime.com/favicon.ico" alt="Greptime Logo" /> **[GreptimeDB](https://github.com/GreptimeTeam/greptimedb-mcp-server)** - Provides AI assistants with a secure and structured way to explore and analyze data in [GreptimeDB](https://github.com/GreptimeTeam/greptimedb).
- <img height="12" width="12" src="https://img.alicdn.com/imgextra/i3/O1CN01d9qrry1i6lTNa2BRa_!!6000000004364-2-tps-218-200.png" alt="Hologres Logo" /> **[Hologres](https://github.com/aliyun/alibabacloud-hologres-mcp-server)** - Connect to a [Hologres](https://www.alibabacloud.com/en/product/hologres) instance, get table metadata, query and analyze data.
- <img height="12" width="12" src="https://hyperbrowser-assets-bucket.s3.us-east-1.amazonaws.com/Hyperbrowser-logo.png" alt="Hyperbrowsers23 Logo" /> **[Hyperbrowser](https://github.com/hyperbrowserai/mcp)** - [Hyperbrowser](https://www.hyperbrowser.ai/) is the next-generation platform empowering AI agents and enabling effortless, scalable browser automation.
- **[IBM wxflows](https://github.com/IBM/wxflows/tree/main/examples/mcp/javascript)** - Tool platform by IBM to build, test and deploy tools for any data source

View File

@@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc
This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.
### Customization - Proxy
The server can be configured to use a proxy by using the `--proxy-url` argument.
## Debugging
You can use the MCP inspector to debug the server. For uvx installations:

View File

@@ -1,6 +1,6 @@
[project]
name = "mcp-server-fetch"
version = "0.6.2"
version = "0.6.3"
description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
readme = "README.md"
requires-python = ">=3.10"
@@ -16,6 +16,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
]
dependencies = [
"httpx<0.28",
"markdownify>=0.13.1",
"mcp>=1.1.3",
"protego>=0.3.1",

View File

@@ -15,9 +15,10 @@ def main():
action="store_true",
help="Ignore robots.txt restrictions",
)
parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
args = parser.parse_args()
asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
if __name__ == "__main__":

View File

@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
return robots_url
async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
"""
Check if the URL can be fetched by the user agent according to the robots.txt file.
Raises a McpError if not.
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
robot_txt_url = get_robots_txt_url(url)
async with AsyncClient() as client:
async with AsyncClient(proxies=proxy_url) as client:
try:
response = await client.get(
robot_txt_url,
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
async def fetch_url(
url: str, user_agent: str, force_raw: bool = False
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
) -> Tuple[str, str]:
"""
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
"""
from httpx import AsyncClient, HTTPError
async with AsyncClient() as client:
async with AsyncClient(proxies=proxy_url) as client:
try:
response = await client.get(
url,
@@ -173,19 +173,22 @@ class Fetch(BaseModel):
bool,
Field(
default=False,
description="Get the actual HTML content if the requested page, without simplification.",
description="Get the actual HTML content of the requested page, without simplification.",
),
]
async def serve(
custom_user_agent: str | None = None, ignore_robots_txt: bool = False
custom_user_agent: str | None = None,
ignore_robots_txt: bool = False,
proxy_url: str | None = None,
) -> None:
"""Run the fetch MCP server.
Args:
custom_user_agent: Optional custom User-Agent string to use for requests
ignore_robots_txt: Whether to ignore robots.txt restrictions
proxy_url: Optional proxy URL to use for requests
"""
server = Server("mcp-fetch")
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -229,10 +232,10 @@ Although originally you did not have internet access, and were advised to refuse
raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))
if not ignore_robots_txt:
await check_may_autonomously_fetch_url(url, user_agent_autonomous)
await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)
content, prefix = await fetch_url(
url, user_agent_autonomous, force_raw=args.raw
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
)
original_length = len(content)
if args.start_index >= original_length:
@@ -259,7 +262,7 @@ Although originally you did not have internet access, and were advised to refuse
url = arguments["url"]
try:
content, prefix = await fetch_url(url, user_agent_manual)
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
# TODO: after SDK bug is addressed, don't catch the exception
except McpError as e:
return GetPromptResult(

View File

@@ -8,7 +8,10 @@ A Model Context Protocol server that provides browser automation capabilities us
- **puppeteer_navigate**
- Navigate to any URL in the browser
- Input: `url` (string)
- Inputs:
- `url` (string, required): URL to navigate to
- `launchOptions` (object, optional): PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: `{ headless: true, args: ['--user-data-dir="C:/Data"'] }`
- `allowDangerous` (boolean, optional): Allow dangerous LaunchOptions that reduce security. When false, dangerous args like `--no-sandbox`, `--disable-web-security` will throw errors. Default false.
- **puppeteer_screenshot**
- Capture screenshots of the entire page or specific elements
@@ -61,6 +64,7 @@ The server provides access to two types of resources:
- Screenshot capabilities
- JavaScript execution
- Basic web interaction (navigation, clicking, form filling)
- Customizable Puppeteer launch options
## Configuration to use Puppeteer Server
Here's the Claude Desktop configuration to use the Puppeter server:
@@ -93,6 +97,39 @@ Here's the Claude Desktop configuration to use the Puppeter server:
}
```
### Launch Options
You can customize Puppeteer's browser behavior in two ways:
1. **Environment Variable**: Set `PUPPETEER_LAUNCH_OPTIONS` with a JSON-encoded string in the MCP configuration's `env` parameter:
```json
{
"mcpServers": {
"mcp-puppeteer": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-puppeteer"]
"env": {
"PUPPETEER_LAUNCH_OPTIONS": "{ \"headless\": false, \"executablePath\": \"C:/Program Files/Google/Chrome/Application/chrome.exe\", \"args\": [] }",
"ALLOW_DANGEROUS": "true"
}
}
}
}
```
2. **Tool Call Arguments**: Pass `launchOptions` and `allowDangerous` parameters to the `puppeteer_navigate` tool:
```json
{
"url": "https://example.com",
"launchOptions": {
"headless": false,
"defaultViewport": {"width": 1280, "height": 720}
}
}
```
## Build
Docker build:
@@ -103,4 +140,4 @@ docker build -t mcp/puppeteer -f src/puppeteer/Dockerfile .
## License
This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.
This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.

View File

@@ -22,7 +22,9 @@ const TOOLS: Tool[] = [
inputSchema: {
type: "object",
properties: {
url: { type: "string" },
url: { type: "string", description: "URL to navigate to" },
launchOptions: { type: "object", description: "PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: { headless: true, args: ['--no-sandbox'] }" },
allowDangerous: { type: "boolean", description: "Allow dangerous LaunchOptions that reduce security. When false, dangerous args like --no-sandbox will throw errors. Default false." },
},
required: ["url"],
},
@@ -101,16 +103,65 @@ const TOOLS: Tool[] = [
];
// Global state
let browser: Browser | undefined;
let page: Page | undefined;
let browser: Browser | null;
let page: Page | null;
const consoleLogs: string[] = [];
const screenshots = new Map<string, string>();
let previousLaunchOptions: any = null;
async function ensureBrowser({ launchOptions, allowDangerous }: any) {
const DANGEROUS_ARGS = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--single-process',
'--disable-web-security',
'--ignore-certificate-errors',
'--disable-features=IsolateOrigins',
'--disable-site-isolation-trials',
'--allow-running-insecure-content'
];
// Parse environment config safely
let envConfig = {};
try {
envConfig = JSON.parse(process.env.PUPPETEER_LAUNCH_OPTIONS || '{}');
} catch (error: any) {
console.warn('Failed to parse PUPPETEER_LAUNCH_OPTIONS:', error?.message || error);
}
// Deep merge environment config with user-provided options
const mergedConfig = deepMerge(envConfig, launchOptions || {});
// Security validation for merged config
if (mergedConfig?.args) {
const dangerousArgs = mergedConfig.args?.filter?.((arg: string) => DANGEROUS_ARGS.some((dangerousArg: string) => arg.startsWith(dangerousArg)));
if (dangerousArgs?.length > 0 && !(allowDangerous || (process.env.ALLOW_DANGEROUS === 'true'))) {
throw new Error(`Dangerous browser arguments detected: ${dangerousArgs.join(', ')}. Fround from environment variable and tool call argument. ` +
'Set allowDangerous: true in the tool call arguments to override.');
}
}
try {
if ((browser && !browser.connected) ||
(launchOptions && (JSON.stringify(launchOptions) != JSON.stringify(previousLaunchOptions)))) {
await browser?.close();
browser = null;
}
}
catch (error) {
browser = null;
}
previousLaunchOptions = launchOptions;
async function ensureBrowser() {
if (!browser) {
const npx_args = { headless: false }
const docker_args = { headless: true, args: ["--no-sandbox", "--single-process", "--no-zygote"] }
browser = await puppeteer.launch(process.env.DOCKER_CONTAINER ? docker_args : npx_args);
browser = await puppeteer.launch(deepMerge(
process.env.DOCKER_CONTAINER ? docker_args : npx_args,
mergedConfig
));
const pages = await browser.pages();
page = pages[0];
@@ -126,6 +177,31 @@ async function ensureBrowser() {
return page!;
}
// Deep merge utility function
function deepMerge(target: any, source: any): any {
const output = Object.assign({}, target);
if (typeof target !== 'object' || typeof source !== 'object') return source;
for (const key of Object.keys(source)) {
const targetVal = target[key];
const sourceVal = source[key];
if (Array.isArray(targetVal) && Array.isArray(sourceVal)) {
// Deduplicate args/ignoreDefaultArgs, prefer source values
output[key] = [...new Set([
...(key === 'args' || key === 'ignoreDefaultArgs' ?
targetVal.filter((arg: string) => !sourceVal.some((launchArg: string) => arg.startsWith('--') && launchArg.startsWith(arg.split('=')[0]))) :
targetVal),
...sourceVal
])];
} else if (sourceVal instanceof Object && key in target) {
output[key] = deepMerge(targetVal, sourceVal);
} else {
output[key] = sourceVal;
}
}
return output;
}
declare global {
interface Window {
mcpHelper: {
@@ -136,7 +212,7 @@ declare global {
}
async function handleToolCall(name: string, args: any): Promise<CallToolResult> {
const page = await ensureBrowser();
const page = await ensureBrowser(args);
switch (name) {
case "puppeteer_navigate":
@@ -285,15 +361,15 @@ async function handleToolCall(name: string, args: any): Promise<CallToolResult>
window.mcpHelper.logs.push(`[${method}] ${args.join(' ')}`);
(window.mcpHelper.originalConsole as any)[method](...args);
};
} );
} );
});
});
const result = await page.evaluate( args.script );
const result = await page.evaluate(args.script);
const logs = await page.evaluate(() => {
Object.assign(console, window.mcpHelper.originalConsole);
const logs = window.mcpHelper.logs;
delete ( window as any).mcpHelper;
delete (window as any).mcpHelper;
return logs;
});
@@ -405,4 +481,4 @@ runServer().catch(console.error);
process.stdin.on("close", () => {
console.error("Puppeteer MCP Server closed");
server.close();
});
});