Merge branch 'main' into add_baidu_ai_search

This commit is contained in:
wolvever
2025-04-01 09:25:16 +08:00
committed by GitHub
9 changed files with 190 additions and 32 deletions

View File

@@ -73,6 +73,7 @@ Official integrations are maintained by companies building production ready MCP
- <img height="12" width="12" src="https://cdn.prod.website-files.com/6605a2979ff17b2cd1939cd4/6605a460de47e7596ed84f06_icon256.png" alt="gotoHuman Logo" /> **[gotoHuman](https://github.com/gotohuman/gotohuman-mcp-server)** - Human-in-the-loop platform - Allow AI agents and automations to send requests for approval to your [gotoHuman](https://www.gotohuman.com) inbox.
- <img height="12" width="12" src="https://grafana.com/favicon.ico" alt="Grafana Logo" /> **[Grafana](https://github.com/grafana/mcp-grafana)** - Search dashboards, investigate incidents and query datasources in your Grafana instance
- <img height="12" width="12" src="https://framerusercontent.com/images/KCOWBYLKunDff1Dr452y6EfjiU.png" alt="Graphlit Logo" /> **[Graphlit](https://github.com/graphlit/graphlit-mcp-server)** - Ingest anything from Slack to Gmail to podcast feeds, in addition to web crawling, into a searchable [Graphlit](https://www.graphlit.com) project.
- <img height="12" width="12" src="https://greptime.com/favicon.ico" alt="Greptime Logo" /> **[GreptimeDB](https://github.com/GreptimeTeam/greptimedb-mcp-server)** - Provides AI assistants with a secure and structured way to explore and analyze data in [GreptimeDB](https://github.com/GreptimeTeam/greptimedb).
- <img height="12" width="12" src="https://img.alicdn.com/imgextra/i3/O1CN01d9qrry1i6lTNa2BRa_!!6000000004364-2-tps-218-200.png" alt="Hologres Logo" /> **[Hologres](https://github.com/aliyun/alibabacloud-hologres-mcp-server)** - Connect to a [Hologres](https://www.alibabacloud.com/en/product/hologres) instance, get table metadata, query and analyze data.
- <img height="12" width="12" src="https://hyperbrowser-assets-bucket.s3.us-east-1.amazonaws.com/Hyperbrowser-logo.png" alt="Hyperbrowsers23 Logo" /> **[Hyperbrowser](https://github.com/hyperbrowserai/mcp)** - [Hyperbrowser](https://www.hyperbrowser.ai/) is the next-generation platform empowering AI agents and enabling effortless, scalable browser automation.
- **[IBM wxflows](https://github.com/IBM/wxflows/tree/main/examples/mcp/javascript)** - Tool platform by IBM to build, test and deploy tools for any data source

View File

@@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc
This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.
### Customization - Proxy
The server can be configured to use a proxy by using the `--proxy-url` argument.
## Debugging
You can use the MCP inspector to debug the server. For uvx installations:

View File

@@ -1,6 +1,6 @@
[project]
name = "mcp-server-fetch"
version = "0.6.2"
version = "0.6.3"
description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
readme = "README.md"
requires-python = ">=3.10"
@@ -16,6 +16,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
]
dependencies = [
"httpx<0.28",
"markdownify>=0.13.1",
"mcp>=1.1.3",
"protego>=0.3.1",

View File

@@ -15,9 +15,10 @@ def main():
action="store_true",
help="Ignore robots.txt restrictions",
)
parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")
args = parser.parse_args()
asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))
if __name__ == "__main__":

View File

@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
return robots_url
async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
"""
Check if the URL can be fetched by the user agent according to the robots.txt file.
Raises a McpError if not.
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
robot_txt_url = get_robots_txt_url(url)
async with AsyncClient() as client:
async with AsyncClient(proxies=proxy_url) as client:
try:
response = await client.get(
robot_txt_url,
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
async def fetch_url(
url: str, user_agent: str, force_raw: bool = False
url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
) -> Tuple[str, str]:
"""
Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
"""
from httpx import AsyncClient, HTTPError
async with AsyncClient() as client:
async with AsyncClient(proxies=proxy_url) as client:
try:
response = await client.get(
url,
@@ -173,19 +173,22 @@ class Fetch(BaseModel):
bool,
Field(
default=False,
description="Get the actual HTML content if the requested page, without simplification.",
description="Get the actual HTML content of the requested page, without simplification.",
),
]
async def serve(
custom_user_agent: str | None = None, ignore_robots_txt: bool = False
custom_user_agent: str | None = None,
ignore_robots_txt: bool = False,
proxy_url: str | None = None,
) -> None:
"""Run the fetch MCP server.
Args:
custom_user_agent: Optional custom User-Agent string to use for requests
ignore_robots_txt: Whether to ignore robots.txt restrictions
proxy_url: Optional proxy URL to use for requests
"""
server = Server("mcp-fetch")
user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -229,10 +232,10 @@ Although originally you did not have internet access, and were advised to refuse
raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))
if not ignore_robots_txt:
await check_may_autonomously_fetch_url(url, user_agent_autonomous)
await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)
content, prefix = await fetch_url(
url, user_agent_autonomous, force_raw=args.raw
url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
)
original_length = len(content)
if args.start_index >= original_length:
@@ -259,7 +262,7 @@ Although originally you did not have internet access, and were advised to refuse
url = arguments["url"]
try:
content, prefix = await fetch_url(url, user_agent_manual)
content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
# TODO: after SDK bug is addressed, don't catch the exception
except McpError as e:
return GetPromptResult(

View File

@@ -7,6 +7,7 @@ import {
} from "@modelcontextprotocol/sdk/types.js";
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import fetch, { Request, Response } from 'node-fetch';
import * as repository from './operations/repository.js';
import * as files from './operations/files.js';
@@ -27,6 +28,11 @@ import {
} from './common/errors.js';
import { VERSION } from "./common/version.js";
// If fetch doesn't exist in global scope, add it
if (!globalThis.fetch) {
globalThis.fetch = fetch as unknown as typeof global.fetch;
}
const server = new Server(
{
name: "github-mcp-server",
@@ -293,10 +299,39 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
case "create_issue": {
const args = issues.CreateIssueSchema.parse(request.params.arguments);
const { owner, repo, ...options } = args;
const issue = await issues.createIssue(owner, repo, options);
return {
content: [{ type: "text", text: JSON.stringify(issue, null, 2) }],
};
try {
console.error(`[DEBUG] Attempting to create issue in ${owner}/${repo}`);
console.error(`[DEBUG] Issue options:`, JSON.stringify(options, null, 2));
const issue = await issues.createIssue(owner, repo, options);
console.error(`[DEBUG] Issue created successfully`);
return {
content: [{ type: "text", text: JSON.stringify(issue, null, 2) }],
};
} catch (err) {
// Type guard for Error objects
const error = err instanceof Error ? err : new Error(String(err));
console.error(`[ERROR] Failed to create issue:`, error);
if (error instanceof GitHubResourceNotFoundError) {
throw new Error(
`Repository '${owner}/${repo}' not found. Please verify:\n` +
`1. The repository exists\n` +
`2. You have correct access permissions\n` +
`3. The owner and repository names are spelled correctly`
);
}
// Safely access error properties
throw new Error(
`Failed to create issue: ${error.message}${
error.stack ? `\nStack: ${error.stack}` : ''
}`
);
}
}
case "create_pull_request": {

View File

@@ -22,10 +22,10 @@ export const GitLabRepositorySchema = z.object({
name: z.string(),
path_with_namespace: z.string(), // Changed from full_name to match GitLab API
visibility: z.string(), // Changed from private to match GitLab API
owner: GitLabOwnerSchema,
owner: GitLabOwnerSchema.optional(),
web_url: z.string(), // Changed from html_url to match GitLab API
description: z.string().nullable(),
fork: z.boolean(),
fork: z.boolean().optional(),
ssh_url_to_repo: z.string(), // Changed from ssh_url to match GitLab API
http_url_to_repo: z.string(), // Changed from clone_url to match GitLab API
created_at: z.string(),
@@ -218,12 +218,12 @@ export const GitLabMergeRequestSchema = z.object({
title: z.string(),
description: z.string(), // Changed from body to match GitLab API
state: z.string(),
merged: z.boolean(),
merged: z.boolean().optional(),
author: GitLabUserSchema,
assignees: z.array(GitLabUserSchema),
source_branch: z.string(), // Changed from head to match GitLab API
target_branch: z.string(), // Changed from base to match GitLab API
diff_refs: GitLabMergeRequestDiffRefSchema,
diff_refs: GitLabMergeRequestDiffRefSchema.nullable(),
web_url: z.string(), // Changed from html_url to match GitLab API
created_at: z.string(),
updated_at: z.string(),

View File

@@ -8,7 +8,10 @@ A Model Context Protocol server that provides browser automation capabilities us
- **puppeteer_navigate**
- Navigate to any URL in the browser
- Input: `url` (string)
- Inputs:
- `url` (string, required): URL to navigate to
- `launchOptions` (object, optional): PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: `{ headless: true, args: ['--user-data-dir="C:/Data"'] }`
- `allowDangerous` (boolean, optional): Allow dangerous LaunchOptions that reduce security. When false, dangerous args like `--no-sandbox`, `--disable-web-security` will throw errors. Default false.
- **puppeteer_screenshot**
- Capture screenshots of the entire page or specific elements
@@ -61,6 +64,7 @@ The server provides access to two types of resources:
- Screenshot capabilities
- JavaScript execution
- Basic web interaction (navigation, clicking, form filling)
- Customizable Puppeteer launch options
## Configuration to use Puppeteer Server
Here's the Claude Desktop configuration to use the Puppeter server:
@@ -93,6 +97,39 @@ Here's the Claude Desktop configuration to use the Puppeter server:
}
```
### Launch Options
You can customize Puppeteer's browser behavior in two ways:
1. **Environment Variable**: Set `PUPPETEER_LAUNCH_OPTIONS` with a JSON-encoded string in the MCP configuration's `env` parameter:
```json
{
"mcpServers": {
"mcp-puppeteer": {
"command": "npx",
"args": ["-y", "@modelcontextprotocol/server-puppeteer"]
"env": {
"PUPPETEER_LAUNCH_OPTIONS": "{ \"headless\": false, \"executablePath\": \"C:/Program Files/Google/Chrome/Application/chrome.exe\", \"args\": [] }",
"ALLOW_DANGEROUS": "true"
}
}
}
}
```
2. **Tool Call Arguments**: Pass `launchOptions` and `allowDangerous` parameters to the `puppeteer_navigate` tool:
```json
{
"url": "https://example.com",
"launchOptions": {
"headless": false,
"defaultViewport": {"width": 1280, "height": 720}
}
}
```
## Build
Docker build:

View File

@@ -22,7 +22,9 @@ const TOOLS: Tool[] = [
inputSchema: {
type: "object",
properties: {
url: { type: "string" },
url: { type: "string", description: "URL to navigate to" },
launchOptions: { type: "object", description: "PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: { headless: true, args: ['--no-sandbox'] }" },
allowDangerous: { type: "boolean", description: "Allow dangerous LaunchOptions that reduce security. When false, dangerous args like --no-sandbox will throw errors. Default false." },
},
required: ["url"],
},
@@ -101,16 +103,65 @@ const TOOLS: Tool[] = [
];
// Global state
let browser: Browser | undefined;
let page: Page | undefined;
let browser: Browser | null;
let page: Page | null;
const consoleLogs: string[] = [];
const screenshots = new Map<string, string>();
let previousLaunchOptions: any = null;
async function ensureBrowser({ launchOptions, allowDangerous }: any) {
const DANGEROUS_ARGS = [
'--no-sandbox',
'--disable-setuid-sandbox',
'--single-process',
'--disable-web-security',
'--ignore-certificate-errors',
'--disable-features=IsolateOrigins',
'--disable-site-isolation-trials',
'--allow-running-insecure-content'
];
// Parse environment config safely
let envConfig = {};
try {
envConfig = JSON.parse(process.env.PUPPETEER_LAUNCH_OPTIONS || '{}');
} catch (error: any) {
console.warn('Failed to parse PUPPETEER_LAUNCH_OPTIONS:', error?.message || error);
}
// Deep merge environment config with user-provided options
const mergedConfig = deepMerge(envConfig, launchOptions || {});
// Security validation for merged config
if (mergedConfig?.args) {
const dangerousArgs = mergedConfig.args?.filter?.((arg: string) => DANGEROUS_ARGS.some((dangerousArg: string) => arg.startsWith(dangerousArg)));
if (dangerousArgs?.length > 0 && !(allowDangerous || (process.env.ALLOW_DANGEROUS === 'true'))) {
throw new Error(`Dangerous browser arguments detected: ${dangerousArgs.join(', ')}. Fround from environment variable and tool call argument. ` +
'Set allowDangerous: true in the tool call arguments to override.');
}
}
try {
if ((browser && !browser.connected) ||
(launchOptions && (JSON.stringify(launchOptions) != JSON.stringify(previousLaunchOptions)))) {
await browser?.close();
browser = null;
}
}
catch (error) {
browser = null;
}
previousLaunchOptions = launchOptions;
async function ensureBrowser() {
if (!browser) {
const npx_args = { headless: false }
const docker_args = { headless: true, args: ["--no-sandbox", "--single-process", "--no-zygote"] }
browser = await puppeteer.launch(process.env.DOCKER_CONTAINER ? docker_args : npx_args);
browser = await puppeteer.launch(deepMerge(
process.env.DOCKER_CONTAINER ? docker_args : npx_args,
mergedConfig
));
const pages = await browser.pages();
page = pages[0];
@@ -126,6 +177,31 @@ async function ensureBrowser() {
return page!;
}
// Deep merge utility function
function deepMerge(target: any, source: any): any {
const output = Object.assign({}, target);
if (typeof target !== 'object' || typeof source !== 'object') return source;
for (const key of Object.keys(source)) {
const targetVal = target[key];
const sourceVal = source[key];
if (Array.isArray(targetVal) && Array.isArray(sourceVal)) {
// Deduplicate args/ignoreDefaultArgs, prefer source values
output[key] = [...new Set([
...(key === 'args' || key === 'ignoreDefaultArgs' ?
targetVal.filter((arg: string) => !sourceVal.some((launchArg: string) => arg.startsWith('--') && launchArg.startsWith(arg.split('=')[0]))) :
targetVal),
...sourceVal
])];
} else if (sourceVal instanceof Object && key in target) {
output[key] = deepMerge(targetVal, sourceVal);
} else {
output[key] = sourceVal;
}
}
return output;
}
declare global {
interface Window {
mcpHelper: {
@@ -136,7 +212,7 @@ declare global {
}
async function handleToolCall(name: string, args: any): Promise<CallToolResult> {
const page = await ensureBrowser();
const page = await ensureBrowser(args);
switch (name) {
case "puppeteer_navigate":
@@ -285,15 +361,15 @@ async function handleToolCall(name: string, args: any): Promise<CallToolResult>
window.mcpHelper.logs.push(`[${method}] ${args.join(' ')}`);
(window.mcpHelper.originalConsole as any)[method](...args);
};
} );
} );
});
});
const result = await page.evaluate( args.script );
const result = await page.evaluate(args.script);
const logs = await page.evaluate(() => {
Object.assign(console, window.mcpHelper.originalConsole);
const logs = window.mcpHelper.logs;
delete ( window as any).mcpHelper;
delete (window as any).mcpHelper;
return logs;
});