Merge branch 'main' into add_baidu_ai_search

This commit is contained in:
wolvever
2025-04-01 09:25:16 +08:00
committed by GitHub
9 changed files with 190 additions and 32 deletions

View File

@@ -73,6 +73,7 @@ Official integrations are maintained by companies building production ready MCP
- <img height="12" width="12" src="https://cdn.prod.website-files.com/6605a2979ff17b2cd1939cd4/6605a460de47e7596ed84f06_icon256.png" alt="gotoHuman Logo" /> **[gotoHuman](https://github.com/gotohuman/gotohuman-mcp-server)** - Human-in-the-loop platform - Allow AI agents and automations to send requests for approval to your [gotoHuman](https://www.gotohuman.com) inbox.
- <img height="12" width="12" src="https://grafana.com/favicon.ico" alt="Grafana Logo" /> **[Grafana](https://github.com/grafana/mcp-grafana)** - Search dashboards, investigate incidents and query datasources in your Grafana instance
- <img height="12" width="12" src="https://framerusercontent.com/images/KCOWBYLKunDff1Dr452y6EfjiU.png" alt="Graphlit Logo" /> **[Graphlit](https://github.com/graphlit/graphlit-mcp-server)** - Ingest anything from Slack to Gmail to podcast feeds, in addition to web crawling, into a searchable [Graphlit](https://www.graphlit.com) project.
- <img height="12" width="12" src="https://greptime.com/favicon.ico" alt="Greptime Logo" /> **[GreptimeDB](https://github.com/GreptimeTeam/greptimedb-mcp-server)** - Provides AI assistants with a secure and structured way to explore and analyze data in [GreptimeDB](https://github.com/GreptimeTeam/greptimedb).
- <img height="12" width="12" src="https://img.alicdn.com/imgextra/i3/O1CN01d9qrry1i6lTNa2BRa_!!6000000004364-2-tps-218-200.png" alt="Hologres Logo" /> **[Hologres](https://github.com/aliyun/alibabacloud-hologres-mcp-server)** - Connect to a [Hologres](https://www.alibabacloud.com/en/product/hologres) instance, get table metadata, query and analyze data.
- <img height="12" width="12" src="https://hyperbrowser-assets-bucket.s3.us-east-1.amazonaws.com/Hyperbrowser-logo.png" alt="Hyperbrowsers23 Logo" /> **[Hyperbrowser](https://github.com/hyperbrowserai/mcp)** - [Hyperbrowser](https://www.hyperbrowser.ai/) is the next-generation platform empowering AI agents and enabling effortless, scalable browser automation.
- **[IBM wxflows](https://github.com/IBM/wxflows/tree/main/examples/mcp/javascript)** - Tool platform by IBM to build, test and deploy tools for any data source

View File

@@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc
This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.
### Customization - Proxy
The server can be configured to use a proxy by using the `--proxy-url` argument.
## Debugging
You can use the MCP inspector to debug the server. For uvx installations:

View File

@@ -1,6 +1,6 @@
[project]
name = "mcp-server-fetch"
version = "0.6.2"
version = "0.6.3"
description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
readme = "README.md"
requires-python = ">=3.10"
@@ -16,6 +16,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
]
dependencies = [
"httpx<0.28",
"markdownify>=0.13.1",
"mcp>=1.1.3",
"protego>=0.3.1",

View File

@@ -15,9 +15,10 @@ def main():
action="store_true",
help="Ignore robots.txt restrictions",
)
parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
args = parser.parse_args()
asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
if __name__ == "__main__":

View File

@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
return robots_url
async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
"""
Check if the URL can be fetched by the user agent according to the robots.txt file.
Raises a McpError if not.
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
robot_txt_url = get_robots_txt_url(url)
async with AsyncClient() as client:
async with AsyncClient(proxies=proxy_url) as client:
try:
response = await client.get(
robot_txt_url,
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
async def fetch_url(
url: str, user_agent: str, force_raw: bool = False
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
) -> Tuple[str, str]:
"""
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
"""
from httpx import AsyncClient, HTTPError
async with AsyncClient() as client:
async with AsyncClient(proxies=proxy_url) as client:
try:
response = await client.get(
url,
@@ -173,19 +173,22 @@ class Fetch(BaseModel):
bool,
Field(
default=False,
description="Get the actual HTML content if the requested page, without simplification.",
description="Get the actual HTML content of the requested page, without simplification.",
),
]
async def serve(
custom_user_agent: str | None = None, ignore_robots_txt: bool = False
custom_user_agent: str | None = None,
ignore_robots_txt: bool = False,
proxy_url: str | None = None,
) -> None:
"""Run the fetch MCP server.
Args:
custom_user_agent: Optional custom User-Agent string to use for requests
ignore_robots_txt: Whether to ignore robots.txt restrictions
proxy_url: Optional proxy URL to use for requests
"""
server = Server("mcp-fetch")
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -229,10 +232,10 @@ Although originally you did not have internet access, and were advised to refuse
raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))
if not ignore_robots_txt:
await check_may_autonomously_fetch_url(url, user_agent_autonomous)
await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)
content, prefix = await fetch_url(
url, user_agent_autonomous, force_raw=args.raw
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
)
original_length = len(content)
if args.start_index >= original_length:
@@ -259,7 +262,7 @@ Although originally you did not have internet access, and were advised to refuse
url = arguments["url"]
try:
content, prefix = await fetch_url(url, user_agent_manual)
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
# TODO: after SDK bug is addressed, don't catch the exception
except McpError as e:
return GetPromptResult(

View File

@@ -7,6 +7,7 @@ import {
} from "@modelcontextprotocol/sdk/types.js";
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import fetch, { Request, Response } from 'node-fetch';
import * as repository from './operations/repository.js';
import * as files from './operations/files.js';
@@ -27,6 +28,11 @@ import {
} from './common/errors.js';
import { VERSION } from "./common/version.js";
// If fetch doesn't exist in global scope, add it
if (!globalThis.fetch) {
globalThis.fetch = fetch as unknown as typeof global.fetch;
}
const server = new Server(
{
name: "github-mcp-server",
@@ -293,10 +299,39 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
case "create_issue": {
const args = issues.CreateIssueSchema.parse(request.params.arguments);
const { owner, repo, ...options } = args;
const issue = await issues.createIssue(owner, repo, options);
return {
content: [{ type: "text", text: JSON.stringify(issue, null, 2) }],
};
try {
console.error(`[DEBUG] Attempting to create issue in ${owner}/${repo}`);
console.error(`[DEBUG] Issue options:`, JSON.stringify(options, null, 2));
const issue = await issues.createIssue(owner, repo, options);
console.error(`[DEBUG] Issue created successfully`);
return {
content: [{ type: "text", text: JSON.stringify(issue, null, 2) }],
};
} catch (err) {
// Type guard for Error objects
const error = err instanceof Error ? err : new Error(String(err));
console.error(`[ERROR] Failed to create issue:`, error);
if (error instanceof GitHubResourceNotFoundError) {
throw new Error(
`Repository '${owner}/${repo}' not found. Please verify:\n` +
`1. The repository exists\n` +
`2. You have correct access permissions\n` +
`3. The owner and repository names are spelled correctly`
);
}
// Safely access error properties
throw new Error(
`Failed to create issue: ${error.message}${
error.stack ? `\nStack: ${error.stack}` : ''
}`
);
}
}
case "create_pull_request": {

View File

@@ -22,10 +22,10 @@ export const GitLabRepositorySchema = z.object({
name: z.string(),
path_with_namespace: z.string(), // Changed from full_name to match GitLab API
visibility: z.string(), // Changed from private to match GitLab API
owner: GitLabOwnerSchema,
owner: GitLabOwnerSchema.optional(),
web_url: z.string(), // Changed from html_url to match GitLab API
description: z.string().nullable(),
fork: z.boolean(),
fork: z.boolean().optional(),
ssh_url_to_repo: z.string(), // Changed from ssh_url to match GitLab API
http_url_to_repo: z.string(), // Changed from clone_url to match GitLab API
created_at: z.string(),
@@ -218,12 +218,12 @@ export const GitLabMergeRequestSchema = z.object({
title: z.string(),
description: z.string(), // Changed from body to match GitLab API
state: z.string(),
merged: z.boolean(),
merged: z.boolean().optional(),
author: GitLabUserSchema,
assignees: z.array(GitLabUserSchema),
source_branch: z.string(), // Changed from head to match GitLab API
target_branch: z.string(), // Changed from base to match GitLab API
diff_refs: GitLabMergeRequestDiffRefSchema,
diff_refs: GitLabMergeRequestDiffRefSchema.nullable(),
web_url: z.string(), // Changed from html_url to match GitLab API
created_at: z.string(),
updated_at: z.string(),

View File

@@ -8,7 +8,10 @@ A Model Context Protocol server that provides browser automation capabilities us
- **puppeteer_navigate**
- Navigate to any URL in the browser
- Input: `url` (string)
- Inputs:
- `url` (string, required): URL to navigate to
- `launchOptions` (object, optional): PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: `{ headless: true, args: ['--user-data-dir="C:/Data"'] }`
- `allowDangerous` (boolean, optional): Allow dangerous LaunchOptions that reduce security. When false, dangerous args like `--no-sandbox`, `--disable-web-security` will throw errors. Default false.
- **puppeteer_screenshot**
- Capture screenshots of the entire page or specific elements
@@ -61,6 +64,7 @@ The server provides access to two types of resources:
- Screenshot capabilities
- JavaScript execution
- Basic web interaction (navigation, clicking, form filling)
- Customizable Puppeteer launch options
## Configuration to use Puppeteer Server
Here's the Claude Desktop configuration to use the Puppeter server:
@@ -93,6 +97,39 @@ Here's the Claude Desktop configuration to use the Puppeter server:
}
```
### Launch Options
You can customize Puppeteer's browser behavior in two ways:
1. **Environment Variable**: Set `PUPPETEER_LAUNCH_OPTIONS` with a JSON-encoded string in the MCP configuration's `env` parameter:
```json
{
"mcpServers": {
"mcp-puppeteer": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-puppeteer"]
"env": {
"PUPPETEER_LAUNCH_OPTIONS": "{ \"headless\": false, \"executablePath\": \"C:/Program Files/Google/Chrome/Application/chrome.exe\", \"args\": [] }",
"ALLOW_DANGEROUS": "true"
}
}
}
}
```
2. **Tool Call Arguments**: Pass `launchOptions` and `allowDangerous` parameters to the `puppeteer_navigate` tool:
```json
{
"url": "https://example.com",
"launchOptions": {
"headless": false,
"defaultViewport": {"width": 1280, "height": 720}
}
}
```
## Build
Docker build:
@@ -103,4 +140,4 @@ docker build -t mcp/puppeteer -f src/puppeteer/Dockerfile .
## License
This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.
This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.

View File

@@ -22,7 +22,9 @@ const TOOLS: Tool[] = [
inputSchema: {
type: "object",
properties: {
url: { type: "string" },
url: { type: "string", description: "URL to navigate to" },
launchOptions: { type: "object", description: "PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: { headless: true, args: ['--no-sandbox'] }" },
allowDangerous: { type: "boolean", description: "Allow dangerous LaunchOptions that reduce security. When false, dangerous args like --no-sandbox will throw errors. Default false." },
},
required: ["url"],
},
@@ -101,16 +103,65 @@ const TOOLS: Tool[] = [
];
// Global state
let browser: Browser | undefined;
let page: Page | undefined;
let browser: Browser | null;
let page: Page | null;
const consoleLogs: string[] = [];
const screenshots = new Map<string, string>();
let previousLaunchOptions: any = null;
async function ensureBrowser({ launchOptions, allowDangerous }: any) {
const DANGEROUS_ARGS = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--single-process',
'--disable-web-security',
'--ignore-certificate-errors',
'--disable-features=IsolateOrigins',
'--disable-site-isolation-trials',
'--allow-running-insecure-content'
];
// Parse environment config safely
let envConfig = {};
try {
envConfig = JSON.parse(process.env.PUPPETEER_LAUNCH_OPTIONS || '{}');
} catch (error: any) {
console.warn('Failed to parse PUPPETEER_LAUNCH_OPTIONS:', error?.message || error);
}
// Deep merge environment config with user-provided options
const mergedConfig = deepMerge(envConfig, launchOptions || {});
// Security validation for merged config
if (mergedConfig?.args) {
const dangerousArgs = mergedConfig.args?.filter?.((arg: string) => DANGEROUS_ARGS.some((dangerousArg: string) => arg.startsWith(dangerousArg)));
if (dangerousArgs?.length > 0 && !(allowDangerous || (process.env.ALLOW_DANGEROUS === 'true'))) {
throw new Error(`Dangerous browser arguments detected: ${dangerousArgs.join(', ')}. Fround from environment variable and tool call argument. ` +
'Set allowDangerous: true in the tool call arguments to override.');
}
}
try {
if ((browser && !browser.connected) ||
(launchOptions && (JSON.stringify(launchOptions) != JSON.stringify(previousLaunchOptions)))) {
await browser?.close();
browser = null;
}
}
catch (error) {
browser = null;
}
previousLaunchOptions = launchOptions;
async function ensureBrowser() {
if (!browser) {
const npx_args = { headless: false }
const docker_args = { headless: true, args: ["--no-sandbox", "--single-process", "--no-zygote"] }
browser = await puppeteer.launch(process.env.DOCKER_CONTAINER ? docker_args : npx_args);
browser = await puppeteer.launch(deepMerge(
process.env.DOCKER_CONTAINER ? docker_args : npx_args,
mergedConfig
));
const pages = await browser.pages();
page = pages[0];
@@ -126,6 +177,31 @@ async function ensureBrowser() {
return page!;
}
// Deep merge utility function
function deepMerge(target: any, source: any): any {
const output = Object.assign({}, target);
if (typeof target !== 'object' || typeof source !== 'object') return source;
for (const key of Object.keys(source)) {
const targetVal = target[key];
const sourceVal = source[key];
if (Array.isArray(targetVal) && Array.isArray(sourceVal)) {
// Deduplicate args/ignoreDefaultArgs, prefer source values
output[key] = [...new Set([
...(key === 'args' || key === 'ignoreDefaultArgs' ?
targetVal.filter((arg: string) => !sourceVal.some((launchArg: string) => arg.startsWith('--') && launchArg.startsWith(arg.split('=')[0]))) :
targetVal),
...sourceVal
])];
} else if (sourceVal instanceof Object && key in target) {
output[key] = deepMerge(targetVal, sourceVal);
} else {
output[key] = sourceVal;
}
}
return output;
}
declare global {
interface Window {
mcpHelper: {
@@ -136,7 +212,7 @@ declare global {
}
async function handleToolCall(name: string, args: any): Promise<CallToolResult> {
const page = await ensureBrowser();
const page = await ensureBrowser(args);
switch (name) {
case "puppeteer_navigate":
@@ -285,15 +361,15 @@ async function handleToolCall(name: string, args: any): Promise<CallToolResult>
window.mcpHelper.logs.push(`[${method}] ${args.join(' ')}`);
(window.mcpHelper.originalConsole as any)[method](...args);
};
} );
} );
});
});
const result = await page.evaluate( args.script );
const result = await page.evaluate(args.script);
const logs = await page.evaluate(() => {
Object.assign(console, window.mcpHelper.originalConsole);
const logs = window.mcpHelper.logs;
delete ( window as any).mcpHelper;
delete (window as any).mcpHelper;
return logs;
});
@@ -405,4 +481,4 @@ runServer().catch(console.error);
process.stdin.on("close", () => {
console.error("Puppeteer MCP Server closed");
server.close();
});
});