Merge branch 'main' into add_baidu_ai_search

2026-04-21 13:25:15 +02:00 · 2025-04-01 09:25:16 +08:00
parent 2b801b6353 e181222154
commit df6eef233b
9 changed files with 190 additions and 32 deletions
--- a/README.md
+++ b/README.md
@@ -73,6 +73,7 @@ Official integrations are maintained by companies building production ready MCP
 - <img height="12" width="12" src="https://cdn.prod.website-files.com/6605a2979ff17b2cd1939cd4/6605a460de47e7596ed84f06_icon256.png" alt="gotoHuman Logo" /> **[gotoHuman](https://github.com/gotohuman/gotohuman-mcp-server)** - Human-in-the-loop platform - Allow AI agents and automations to send requests for approval to your [gotoHuman](https://www.gotohuman.com) inbox.
 - <img height="12" width="12" src="https://grafana.com/favicon.ico" alt="Grafana Logo" /> **[Grafana](https://github.com/grafana/mcp-grafana)** - Search dashboards, investigate incidents and query datasources in your Grafana instance
 - <img height="12" width="12" src="https://framerusercontent.com/images/KCOWBYLKunDff1Dr452y6EfjiU.png" alt="Graphlit Logo" /> **[Graphlit](https://github.com/graphlit/graphlit-mcp-server)** - Ingest anything from Slack to Gmail to podcast feeds, in addition to web crawling, into a searchable [Graphlit](https://www.graphlit.com) project.
+- <img height="12" width="12" src="https://greptime.com/favicon.ico" alt="Greptime Logo" /> **[GreptimeDB](https://github.com/GreptimeTeam/greptimedb-mcp-server)** - Provides AI assistants with a secure and structured way to explore and analyze data in [GreptimeDB](https://github.com/GreptimeTeam/greptimedb).
 - <img height="12" width="12" src="https://img.alicdn.com/imgextra/i3/O1CN01d9qrry1i6lTNa2BRa_!!6000000004364-2-tps-218-200.png" alt="Hologres Logo" /> **[Hologres](https://github.com/aliyun/alibabacloud-hologres-mcp-server)** - Connect to a [Hologres](https://www.alibabacloud.com/en/product/hologres) instance, get table metadata, query and analyze data.
 - <img height="12" width="12" src="https://hyperbrowser-assets-bucket.s3.us-east-1.amazonaws.com/Hyperbrowser-logo.png" alt="Hyperbrowsers23 Logo" /> **[Hyperbrowser](https://github.com/hyperbrowserai/mcp)** - [Hyperbrowser](https://www.hyperbrowser.ai/) is the next-generation platform empowering AI agents and enabling effortless, scalable browser automation.
 - **[IBM wxflows](https://github.com/IBM/wxflows/tree/main/examples/mcp/javascript)** - Tool platform by IBM to build, test and deploy tools for any data source
--- a/src/fetch/README.md
+++ b/src/fetch/README.md
@@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc

 This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.

+### Customization - Proxy
+
+The server can be configured to use a proxy by using the `--proxy-url` argument.
+
 ## Debugging

 You can use the MCP inspector to debug the server. For uvx installations:
--- a/src/fetch/pyproject.toml
+++ b/src/fetch/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mcp-server-fetch"
-version = "0.6.2"
+version = "0.6.3"
 description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -16,6 +16,7 @@ classifiers = [
    "Programming Language :: Python :: 3.10",
 ]
 dependencies = [
+    "httpx<0.28",
    "markdownify>=0.13.1",
    "mcp>=1.1.3",
    "protego>=0.3.1",
--- a/src/fetch/src/mcp_server_fetch/init.py
+++ b/src/fetch/src/mcp_server_fetch/init.py
@@ -15,9 +15,10 @@ def main():
        action="store_true",
        help="Ignore robots.txt restrictions",
    )
+    parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")

    args = parser.parse_args()
-    asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
+    asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))


 if __name__ == "__main__":
--- a/src/fetch/src/mcp_server_fetch/server.py
+++ b/src/fetch/src/mcp_server_fetch/server.py
@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
    return robots_url


-async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
+async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
    """
    Check if the URL can be fetched by the user agent according to the robots.txt file.
    Raises a McpError if not.
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:

    robot_txt_url = get_robots_txt_url(url)

-    async with AsyncClient() as client:
+    async with AsyncClient(proxies=proxy_url) as client:
        try:
            response = await client.get(
                robot_txt_url,
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:


 async def fetch_url(
-    url: str, user_agent: str, force_raw: bool = False
+    url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
 ) -> Tuple[str, str]:
    """
    Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
    """
    from httpx import AsyncClient, HTTPError

-    async with AsyncClient() as client:
+    async with AsyncClient(proxies=proxy_url) as client:
        try:
            response = await client.get(
                url,
@@ -173,19 +173,22 @@ class Fetch(BaseModel):
        bool,
        Field(
            default=False,
-            description="Get the actual HTML content if the requested page, without simplification.",
+            description="Get the actual HTML content of the requested page, without simplification.",
        ),
    ]


 async def serve(
-    custom_user_agent: str | None = None, ignore_robots_txt: bool = False
+    custom_user_agent: str | None = None,
+    ignore_robots_txt: bool = False,
+    proxy_url: str | None = None,
 ) -> None:
    """Run the fetch MCP server.

    Args:
        custom_user_agent: Optional custom User-Agent string to use for requests
        ignore_robots_txt: Whether to ignore robots.txt restrictions
+        proxy_url: Optional proxy URL to use for requests
    """
    server = Server("mcp-fetch")
    user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -229,10 +232,10 @@ Although originally you did not have internet access, and were advised to refuse
            raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))

        if not ignore_robots_txt:
-            await check_may_autonomously_fetch_url(url, user_agent_autonomous)
+            await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)

        content, prefix = await fetch_url(
-            url, user_agent_autonomous, force_raw=args.raw
+            url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
        )
        original_length = len(content)
        if args.start_index >= original_length:
@@ -259,7 +262,7 @@ Although originally you did not have internet access, and were advised to refuse
        url = arguments["url"]

        try:
-            content, prefix = await fetch_url(url, user_agent_manual)
+            content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
            # TODO: after SDK bug is addressed, don't catch the exception
        except McpError as e:
            return GetPromptResult(
--- a/src/github/index.ts
+++ b/src/github/index.ts
@@ -7,6 +7,7 @@ import {
 } from "@modelcontextprotocol/sdk/types.js";
 import { z } from 'zod';
 import { zodToJsonSchema } from 'zod-to-json-schema';
+import fetch, { Request, Response } from 'node-fetch';

 import * as repository from './operations/repository.js';
 import * as files from './operations/files.js';
@@ -27,6 +28,11 @@ import {
 } from './common/errors.js';
 import { VERSION } from "./common/version.js";

+// If fetch doesn't exist in global scope, add it
+if (!globalThis.fetch) {
+  globalThis.fetch = fetch as unknown as typeof global.fetch;
+}
+
 const server = new Server(
  {
    name: "github-mcp-server",
@@ -293,10 +299,39 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
      case "create_issue": {
        const args = issues.CreateIssueSchema.parse(request.params.arguments);
        const { owner, repo, ...options } = args;
-        const issue = await issues.createIssue(owner, repo, options);
-        return {
-          content: [{ type: "text", text: JSON.stringify(issue, null, 2) }],
-        };
+        
+        try {
+          console.error(`[DEBUG] Attempting to create issue in ${owner}/${repo}`);
+          console.error(`[DEBUG] Issue options:`, JSON.stringify(options, null, 2));
+          
+          const issue = await issues.createIssue(owner, repo, options);
+          
+          console.error(`[DEBUG] Issue created successfully`);
+          return {
+            content: [{ type: "text", text: JSON.stringify(issue, null, 2) }],
+          };
+        } catch (err) {
+          // Type guard for Error objects
+          const error = err instanceof Error ? err : new Error(String(err));
+          
+          console.error(`[ERROR] Failed to create issue:`, error);
+          
+          if (error instanceof GitHubResourceNotFoundError) {
+            throw new Error(
+              `Repository '${owner}/${repo}' not found. Please verify:\n` +
+              `1. The repository exists\n` +
+              `2. You have correct access permissions\n` +
+              `3. The owner and repository names are spelled correctly`
+            );
+          }
+          
+          // Safely access error properties
+          throw new Error(
+            `Failed to create issue: ${error.message}${
+              error.stack ? `\nStack: ${error.stack}` : ''
+            }`
+          );
+        }
      }

      case "create_pull_request": {
--- a/src/gitlab/schemas.ts
+++ b/src/gitlab/schemas.ts
@@ -22,10 +22,10 @@ export const GitLabRepositorySchema = z.object({
  name: z.string(),
  path_with_namespace: z.string(), // Changed from full_name to match GitLab API
  visibility: z.string(), // Changed from private to match GitLab API
-  owner: GitLabOwnerSchema,
+  owner: GitLabOwnerSchema.optional(),
  web_url: z.string(), // Changed from html_url to match GitLab API
  description: z.string().nullable(),
-  fork: z.boolean(),
+  fork: z.boolean().optional(),
  ssh_url_to_repo: z.string(), // Changed from ssh_url to match GitLab API
  http_url_to_repo: z.string(), // Changed from clone_url to match GitLab API
  created_at: z.string(),
@@ -218,12 +218,12 @@ export const GitLabMergeRequestSchema = z.object({
  title: z.string(),
  description: z.string(), // Changed from body to match GitLab API
  state: z.string(),
-  merged: z.boolean(),
+  merged: z.boolean().optional(),
  author: GitLabUserSchema,
  assignees: z.array(GitLabUserSchema),
  source_branch: z.string(), // Changed from head to match GitLab API
  target_branch: z.string(), // Changed from base to match GitLab API
-  diff_refs: GitLabMergeRequestDiffRefSchema,
+  diff_refs: GitLabMergeRequestDiffRefSchema.nullable(),
  web_url: z.string(), // Changed from html_url to match GitLab API
  created_at: z.string(),
  updated_at: z.string(),
--- a/src/puppeteer/README.md
+++ b/src/puppeteer/README.md
@@ -8,7 +8,10 @@ A Model Context Protocol server that provides browser automation capabilities us

 - **puppeteer_navigate**
  - Navigate to any URL in the browser
-  - Input: `url` (string)
+  - Inputs:
+    - `url` (string, required): URL to navigate to
+    - `launchOptions` (object, optional): PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: `{ headless: true, args: ['--user-data-dir="C:/Data"'] }`
+    - `allowDangerous` (boolean, optional): Allow dangerous LaunchOptions that reduce security. When false, dangerous args like `--no-sandbox`, `--disable-web-security` will throw errors. Default false.

 - **puppeteer_screenshot**
  - Capture screenshots of the entire page or specific elements
@@ -61,6 +64,7 @@ The server provides access to two types of resources:
 - Screenshot capabilities
 - JavaScript execution
 - Basic web interaction (navigation, clicking, form filling)
+- Customizable Puppeteer launch options

 ## Configuration to use Puppeteer Server
 Here's the Claude Desktop configuration to use the Puppeter server:
@@ -93,6 +97,39 @@ Here's the Claude Desktop configuration to use the Puppeter server:
 }
 ```

+### Launch Options
+
+You can customize Puppeteer's browser behavior in two ways:
+
+1. **Environment Variable**: Set `PUPPETEER_LAUNCH_OPTIONS` with a JSON-encoded string in the MCP configuration's `env` parameter:
+
+    ```json
+    {
+      "mcpServers": {
+        "mcp-puppeteer": {
+          "command": "npx",
+          "args": ["-y", "@modelcontextprotocol/server-puppeteer"]
+          "env": {
+            "PUPPETEER_LAUNCH_OPTIONS": "{ \"headless\": false, \"executablePath\": \"C:/Program Files/Google/Chrome/Application/chrome.exe\", \"args\": [] }",
+            "ALLOW_DANGEROUS": "true"
+          }
+        }
+      }
+    }
+    ```
+
+2. **Tool Call Arguments**: Pass `launchOptions` and `allowDangerous` parameters to the `puppeteer_navigate` tool:
+
+   ```json
+   {
+     "url": "https://example.com",
+     "launchOptions": {
+       "headless": false,
+       "defaultViewport": {"width": 1280, "height": 720}
+     }
+   }
+   ```
+
 ## Build

 Docker build:
--- a/src/puppeteer/index.ts
+++ b/src/puppeteer/index.ts
@@ -22,7 +22,9 @@ const TOOLS: Tool[] = [
    inputSchema: {
      type: "object",
      properties: {
-        url: { type: "string" },
+        url: { type: "string", description: "URL to navigate to" },
+        launchOptions: { type: "object", description: "PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: { headless: true, args: ['--no-sandbox'] }" },
+        allowDangerous: { type: "boolean", description: "Allow dangerous LaunchOptions that reduce security. When false, dangerous args like --no-sandbox will throw errors. Default false." },
      },
      required: ["url"],
    },
@@ -101,16 +103,65 @@ const TOOLS: Tool[] = [
 ];

 // Global state
-let browser: Browser | undefined;
-let page: Page | undefined;
+let browser: Browser | null;
+let page: Page | null;
 const consoleLogs: string[] = [];
 const screenshots = new Map<string, string>();
+let previousLaunchOptions: any = null;
+
+async function ensureBrowser({ launchOptions, allowDangerous }: any) {
+
+  const DANGEROUS_ARGS = [
+    '--no-sandbox',
+    '--disable-setuid-sandbox',
+    '--single-process',
+    '--disable-web-security',
+    '--ignore-certificate-errors',
+    '--disable-features=IsolateOrigins',
+    '--disable-site-isolation-trials',
+    '--allow-running-insecure-content'
+  ];
+
+  // Parse environment config safely
+  let envConfig = {};
+  try {
+    envConfig = JSON.parse(process.env.PUPPETEER_LAUNCH_OPTIONS || '{}');
+  } catch (error: any) {
+    console.warn('Failed to parse PUPPETEER_LAUNCH_OPTIONS:', error?.message || error);
+  }
+
+  // Deep merge environment config with user-provided options
+  const mergedConfig = deepMerge(envConfig, launchOptions || {});
+
+  // Security validation for merged config
+  if (mergedConfig?.args) {
+    const dangerousArgs = mergedConfig.args?.filter?.((arg: string) => DANGEROUS_ARGS.some((dangerousArg: string) => arg.startsWith(dangerousArg)));
+    if (dangerousArgs?.length > 0 && !(allowDangerous || (process.env.ALLOW_DANGEROUS === 'true'))) {
+      throw new Error(`Dangerous browser arguments detected: ${dangerousArgs.join(', ')}. Fround from environment variable and tool call argument. ` +
+        'Set allowDangerous: true in the tool call arguments to override.');
+    }
+  }
+
+  try {
+    if ((browser && !browser.connected) ||
+      (launchOptions && (JSON.stringify(launchOptions) != JSON.stringify(previousLaunchOptions)))) {
+      await browser?.close();
+      browser = null;
+    }
+  }
+  catch (error) {
+    browser = null;
+  }
+
+  previousLaunchOptions = launchOptions;

-async function ensureBrowser() {
  if (!browser) {
    const npx_args = { headless: false }
    const docker_args = { headless: true, args: ["--no-sandbox", "--single-process", "--no-zygote"] }
-    browser = await puppeteer.launch(process.env.DOCKER_CONTAINER ? docker_args : npx_args);
+    browser = await puppeteer.launch(deepMerge(
+      process.env.DOCKER_CONTAINER ? docker_args : npx_args,
+      mergedConfig
+    ));
    const pages = await browser.pages();
    page = pages[0];

@@ -126,6 +177,31 @@ async function ensureBrowser() {
  return page!;
 }

+// Deep merge utility function
+function deepMerge(target: any, source: any): any {
+  const output = Object.assign({}, target);
+  if (typeof target !== 'object' || typeof source !== 'object') return source;
+
+  for (const key of Object.keys(source)) {
+    const targetVal = target[key];
+    const sourceVal = source[key];
+    if (Array.isArray(targetVal) && Array.isArray(sourceVal)) {
+      // Deduplicate args/ignoreDefaultArgs, prefer source values
+      output[key] = [...new Set([
+        ...(key === 'args' || key === 'ignoreDefaultArgs' ?
+          targetVal.filter((arg: string) => !sourceVal.some((launchArg: string) => arg.startsWith('--') && launchArg.startsWith(arg.split('=')[0]))) :
+          targetVal),
+        ...sourceVal
+      ])];
+    } else if (sourceVal instanceof Object && key in target) {
+      output[key] = deepMerge(targetVal, sourceVal);
+    } else {
+      output[key] = sourceVal;
+    }
+  }
+  return output;
+}
+
 declare global {
  interface Window {
    mcpHelper: {
@@ -136,7 +212,7 @@ declare global {
 }

 async function handleToolCall(name: string, args: any): Promise<CallToolResult> {
-  const page = await ensureBrowser();
+  const page = await ensureBrowser(args);

  switch (name) {
    case "puppeteer_navigate":
@@ -285,15 +361,15 @@ async function handleToolCall(name: string, args: any): Promise<CallToolResult>
              window.mcpHelper.logs.push(`[${method}] ${args.join(' ')}`);
              (window.mcpHelper.originalConsole as any)[method](...args);
            };
-          } );
-        } );
+          });
+        });

-        const result = await page.evaluate( args.script );
+        const result = await page.evaluate(args.script);

        const logs = await page.evaluate(() => {
          Object.assign(console, window.mcpHelper.originalConsole);
          const logs = window.mcpHelper.logs;
-          delete ( window as any).mcpHelper;
+          delete (window as any).mcpHelper;
          return logs;
        });