Merge branch 'main' into add_baidu_ai_search

2026-04-17 23:53:24 +02:00 · 2025-04-01 09:25:16 +08:00
parent 2b801b6353 e181222154
commit df6eef233b
9 changed files with 190 additions and 32 deletions
--- a/README.md
+++ b/README.md
@@ -73,6 +73,7 @@ Official integrations are maintained by companies building production ready MCP
 - <img height="12" width="12" src="https://cdn.prod.website-files.com/6605a2979ff17b2cd1939cd4/6605a460de47e7596ed84f06_icon256.png" alt="gotoHuman Logo" /> **[gotoHuman](https://github.com/gotohuman/gotohuman-mcp-server)** - Human-in-the-loop platform - Allow AI agents and automations to send requests for approval to your [gotoHuman](https://www.gotohuman.com) inbox.
 - <img height="12" width="12" src="https://grafana.com/favicon.ico" alt="Grafana Logo" /> **[Grafana](https://github.com/grafana/mcp-grafana)** - Search dashboards, investigate incidents and query datasources in your Grafana instance
 - <img height="12" width="12" src="https://framerusercontent.com/images/KCOWBYLKunDff1Dr452y6EfjiU.png" alt="Graphlit Logo" /> **[Graphlit](https://github.com/graphlit/graphlit-mcp-server)** - Ingest anything from Slack to Gmail to podcast feeds, in addition to web crawling, into a searchable [Graphlit](https://www.graphlit.com) project.
+- <img height="12" width="12" src="https://greptime.com/favicon.ico" alt="Greptime Logo" /> **[GreptimeDB](https://github.com/GreptimeTeam/greptimedb-mcp-server)** - Provides AI assistants with a secure and structured way to explore and analyze data in [GreptimeDB](https://github.com/GreptimeTeam/greptimedb).
 - <img height="12" width="12" src="https://img.alicdn.com/imgextra/i3/O1CN01d9qrry1i6lTNa2BRa_!!6000000004364-2-tps-218-200.png" alt="Hologres Logo" /> **[Hologres](https://github.com/aliyun/alibabacloud-hologres-mcp-server)** - Connect to a [Hologres](https://www.alibabacloud.com/en/product/hologres) instance, get table metadata, query and analyze data.
 - <img height="12" width="12" src="https://hyperbrowser-assets-bucket.s3.us-east-1.amazonaws.com/Hyperbrowser-logo.png" alt="Hyperbrowsers23 Logo" /> **[Hyperbrowser](https://github.com/hyperbrowserai/mcp)** - [Hyperbrowser](https://www.hyperbrowser.ai/) is the next-generation platform empowering AI agents and enabling effortless, scalable browser automation.
 - **[IBM wxflows](https://github.com/IBM/wxflows/tree/main/examples/mcp/javascript)** - Tool platform by IBM to build, test and deploy tools for any data source
--- a/src/fetch/README.md
+++ b/src/fetch/README.md
@@ -107,6 +107,10 @@ ModelContextProtocol/1.0 (User-Specified; +https://github.com/modelcontextprotoc

 This can be customized by adding the argument `--user-agent=YourUserAgent` to the `args` list in the configuration.

+### Customization - Proxy
+
+The server can be configured to use a proxy by using the `--proxy-url` argument.
+
 ## Debugging

 You can use the MCP inspector to debug the server. For uvx installations:
--- a/src/fetch/pyproject.toml
+++ b/src/fetch/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mcp-server-fetch"
-version = "0.6.2"
+version = "0.6.3"
 description = "A Model Context Protocol server providing tools to fetch and convert web content for usage by LLMs"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -16,6 +16,7 @@ classifiers = [
    "Programming Language :: Python :: 3.10",
 ]
 dependencies = [
+    "httpx<0.28",
    "markdownify>=0.13.1",
    "mcp>=1.1.3",
    "protego>=0.3.1",
--- a/src/fetch/src/mcp_server_fetch/init.py
+++ b/src/fetch/src/mcp_server_fetch/init.py
@@ -15,9 +15,10 @@ def main():
        action="store_true",
        help="Ignore robots.txt restrictions",
    )
+    parser.add_argument("--proxy-url", type=str, help="Proxy URL to use for requests")

    args = parser.parse_args()
-    asyncio.run(serve(args.user_agent, args.ignore_robots_txt))
+    asyncio.run(serve(args.user_agent, args.ignore_robots_txt, args.proxy_url))


 if __name__ == "__main__":
--- a/src/fetch/src/mcp_server_fetch/server.py
+++ b/src/fetch/src/mcp_server_fetch/server.py
@@ -63,7 +63,7 @@ def get_robots_txt_url(url: str) -> str:
    return robots_url


-async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:
+async def check_may_autonomously_fetch_url(url: str, user_agent: str, proxy_url: str | None = None) -> None:
    """
    Check if the URL can be fetched by the user agent according to the robots.txt file.
    Raises a McpError if not.
@@ -72,7 +72,7 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:

    robot_txt_url = get_robots_txt_url(url)

-    async with AsyncClient() as client:
+    async with AsyncClient(proxies=proxy_url) as client:
        try:
            response = await client.get(
                robot_txt_url,
@@ -109,14 +109,14 @@ async def check_may_autonomously_fetch_url(url: str, user_agent: str) -> None:


 async def fetch_url(
-    url: str, user_agent: str, force_raw: bool = False
+    url: str, user_agent: str, force_raw: bool = False, proxy_url: str | None = None
 ) -> Tuple[str, str]:
    """
    Fetch the URL and return the content in a form ready for the LLM, as well as a prefix string with status information.
    """
    from httpx import AsyncClient, HTTPError

-    async with AsyncClient() as client:
+    async with AsyncClient(proxies=proxy_url) as client:
        try:
            response = await client.get(
                url,
@@ -173,19 +173,22 @@ class Fetch(BaseModel):
        bool,
        Field(
            default=False,
-            description="Get the actual HTML content if the requested page, without simplification.",
+            description="Get the actual HTML content of the requested page, without simplification.",
        ),
    ]


 async def serve(
-    custom_user_agent: str | None = None, ignore_robots_txt: bool = False
+    custom_user_agent: str | None = None,
+    ignore_robots_txt: bool = False,
+    proxy_url: str | None = None,
 ) -> None:
    """Run the fetch MCP server.

    Args:
        custom_user_agent: Optional custom User-Agent string to use for requests
        ignore_robots_txt: Whether to ignore robots.txt restrictions
+        proxy_url: Optional proxy URL to use for requests
    """
    server = Server("mcp-fetch")
    user_agent_autonomous = custom_user_agent or DEFAULT_USER_AGENT_AUTONOMOUS
@@ -229,10 +232,10 @@ Although originally you did not have internet access, and were advised to refuse
            raise McpError(ErrorData(code=INVALID_PARAMS, message="URL is required"))

        if not ignore_robots_txt:
-            await check_may_autonomously_fetch_url(url, user_agent_autonomous)
+            await check_may_autonomously_fetch_url(url, user_agent_autonomous, proxy_url)

        content, prefix = await fetch_url(
-            url, user_agent_autonomous, force_raw=args.raw
+            url, user_agent_autonomous, force_raw=args.raw, proxy_url=proxy_url
        )
        original_length = len(content)
        if args.start_index >= original_length:
@@ -259,7 +262,7 @@ Although originally you did not have internet access, and were advised to refuse
        url = arguments["url"]

        try:
-            content, prefix = await fetch_url(url, user_agent_manual)
+            content, prefix = await fetch_url(url, user_agent_manual, proxy_url=proxy_url)
            # TODO: after SDK bug is addressed, don't catch the exception
        except McpError as e:
            return GetPromptResult(
--- a/src/github/index.ts
+++ b/src/github/index.ts
@@ -7,6 +7,7 @@ import {
 } from "@modelcontextprotocol/sdk/types.js";
 import { z } from 'zod';
 import { zodToJsonSchema } from 'zod-to-json-schema';
+import fetch, { Request, Response } from 'node-fetch';

 import * as repository from './operations/repository.js';
 import * as files from './operations/files.js';
@@ -27,6 +28,11 @@ import {
 } from './common/errors.js';
 import { VERSION } from "./common/version.js";

+// If fetch doesn't exist in global scope, add it
+if (!globalThis.fetch) {
+  globalThis.fetch = fetch as unknown as typeof global.fetch;
+}
+
 const server = new Server(
  {
    name: "github-mcp-server",
@@ -293,10 +299,39 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
      case "create_issue": {
        const args = issues.CreateIssueSchema.parse(request.params.arguments);
        const { owner, repo, ...options } = args;
-        const issue = await issues.createIssue(owner, repo, options);
-        return {
-          content: [{ type: "text", text: JSON.stringify(issue, null, 2) }],
-        };
+        
+        try {
+          console.error(`[DEBUG] Attempting to create issue in ${owner}/${repo}`);
+          console.error(`[DEBUG] Issue options:`, JSON.stringify(options, null, 2));
+          
+          const issue = await issues.createIssue(owner, repo, options);
+          
+          console.error(`[DEBUG] Issue created successfully`);
+          return {
+            content: [{ type: "text", text: JSON.stringify(issue, null, 2) }],
+          };
+        } catch (err) {
+          // Type guard for Error objects
+          const error = err instanceof Error ? err : new Error(String(err));
+          
+          console.error(`[ERROR] Failed to create issue:`, error);
+          
+          if (error instanceof GitHubResourceNotFoundError) {
+            throw new Error(
+              `Repository '${owner}/${repo}' not found. Please verify:\n` +
+              `1. The repository exists\n` +
+              `2. You have correct access permissions\n` +
+              `3. The owner and repository names are spelled correctly`
+            );
+          }
+          
+          // Safely access error properties
+          throw new Error(
+            `Failed to create issue: ${error.message}${
+              error.stack ? `\nStack: ${error.stack}` : ''
+            }`
+          );
+        }
      }

      case "create_pull_request": {
--- a/src/gitlab/schemas.ts
+++ b/src/gitlab/schemas.ts
@@ -22,10 +22,10 @@ export const GitLabRepositorySchema = z.object({
  name: z.string(),
  path_with_namespace: z.string(), // Changed from full_name to match GitLab API
  visibility: z.string(), // Changed from private to match GitLab API
-  owner: GitLabOwnerSchema,
+  owner: GitLabOwnerSchema.optional(),
  web_url: z.string(), // Changed from html_url to match GitLab API
  description: z.string().nullable(),
-  fork: z.boolean(),
+  fork: z.boolean().optional(),
  ssh_url_to_repo: z.string(), // Changed from ssh_url to match GitLab API
  http_url_to_repo: z.string(), // Changed from clone_url to match GitLab API
  created_at: z.string(),
@@ -218,12 +218,12 @@ export const GitLabMergeRequestSchema = z.object({
  title: z.string(),
  description: z.string(), // Changed from body to match GitLab API
  state: z.string(),
-  merged: z.boolean(),
+  merged: z.boolean().optional(),
  author: GitLabUserSchema,
  assignees: z.array(GitLabUserSchema),
  source_branch: z.string(), // Changed from head to match GitLab API
  target_branch: z.string(), // Changed from base to match GitLab API
-  diff_refs: GitLabMergeRequestDiffRefSchema,
+  diff_refs: GitLabMergeRequestDiffRefSchema.nullable(),
  web_url: z.string(), // Changed from html_url to match GitLab API
  created_at: z.string(),
  updated_at: z.string(),
--- a/src/puppeteer/README.md
+++ b/src/puppeteer/README.md
@@ -8,7 +8,10 @@ A Model Context Protocol server that provides browser automation capabilities us

 - **puppeteer_navigate**
  - Navigate to any URL in the browser
-  - Input: `url` (string)
+  - Inputs:
+    - `url` (string, required): URL to navigate to
+    - `launchOptions` (object, optional): PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: `{ headless: true, args: ['--user-data-dir="C:/Data"'] }`
+    - `allowDangerous` (boolean, optional): Allow dangerous LaunchOptions that reduce security. When false, dangerous args like `--no-sandbox`, `--disable-web-security` will throw errors. Default false.

 - **puppeteer_screenshot**
  - Capture screenshots of the entire page or specific elements
@@ -61,6 +64,7 @@ The server provides access to two types of resources:
 - Screenshot capabilities
 - JavaScript execution
 - Basic web interaction (navigation, clicking, form filling)
+- Customizable Puppeteer launch options

 ## Configuration to use Puppeteer Server
 Here's the Claude Desktop configuration to use the Puppeter server:
@@ -93,6 +97,39 @@ Here's the Claude Desktop configuration to use the Puppeter server:
 }
 ```

+### Launch Options
+
+You can customize Puppeteer's browser behavior in two ways:
+
+1. **Environment Variable**: Set `PUPPETEER_LAUNCH_OPTIONS` with a JSON-encoded string in the MCP configuration's `env` parameter:
+
+    ```json
+    {
+      "mcpServers": {
+        "mcp-puppeteer": {
+          "command": "npx",
+          "args": ["-y", "@modelcontextprotocol/server-puppeteer"]
+          "env": {
+            "PUPPETEER_LAUNCH_OPTIONS": "{ \"headless\": false, \"executablePath\": \"C:/Program Files/Google/Chrome/Application/chrome.exe\", \"args\": [] }",
+            "ALLOW_DANGEROUS": "true"
+          }
+        }
+      }
+    }
+    ```
+
+2. **Tool Call Arguments**: Pass `launchOptions` and `allowDangerous` parameters to the `puppeteer_navigate` tool:
+
+   ```json
+   {
+     "url": "https://example.com",
+     "launchOptions": {
+       "headless": false,
+       "defaultViewport": {"width": 1280, "height": 720}
+     }
+   }
+   ```
+
 ## Build

 Docker build:
@@ -103,4 +140,4 @@ docker build -t mcp/puppeteer -f src/puppeteer/Dockerfile .

 ## License

-This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.
+This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the LICENSE file in the project repository.
--- a/src/puppeteer/index.ts
+++ b/src/puppeteer/index.ts
@@ -22,7 +22,9 @@ const TOOLS: Tool[] = [
    inputSchema: {
      type: "object",
      properties: {
-        url: { type: "string" },
+        url: { type: "string", description: "URL to navigate to" },
+        launchOptions: { type: "object", description: "PuppeteerJS LaunchOptions. Default null. If changed and not null, browser restarts. Example: { headless: true, args: ['--no-sandbox'] }" },
+        allowDangerous: { type: "boolean", description: "Allow dangerous LaunchOptions that reduce security. When false, dangerous args like --no-sandbox will throw errors. Default false." },
      },
      required: ["url"],
    },
@@ -101,16 +103,65 @@ const TOOLS: Tool[] = [
 ];

 // Global state
-let browser: Browser | undefined;
-let page: Page | undefined;
+let browser: Browser | null;
+let page: Page | null;
 const consoleLogs: string[] = [];
 const screenshots = new Map<string, string>();
+let previousLaunchOptions: any = null;
+
+async function ensureBrowser({ launchOptions, allowDangerous }: any) {
+
+  const DANGEROUS_ARGS = [
+    '--no-sandbox',
+    '--disable-setuid-sandbox',
+    '--single-process',
+    '--disable-web-security',
+    '--ignore-certificate-errors',
+    '--disable-features=IsolateOrigins',
+    '--disable-site-isolation-trials',
+    '--allow-running-insecure-content'
+  ];
+
+  // Parse environment config safely
+  let envConfig = {};
+  try {
+    envConfig = JSON.parse(process.env.PUPPETEER_LAUNCH_OPTIONS || '{}');
+  } catch (error: any) {
+    console.warn('Failed to parse PUPPETEER_LAUNCH_OPTIONS:', error?.message || error);
+  }
+
+  // Deep merge environment config with user-provided options
+  const mergedConfig = deepMerge(envConfig, launchOptions || {});
+
+  // Security validation for merged config
+  if (mergedConfig?.args) {
+    const dangerousArgs = mergedConfig.args?.filter?.((arg: string) => DANGEROUS_ARGS.some((dangerousArg: string) => arg.startsWith(dangerousArg)));
+    if (dangerousArgs?.length > 0 && !(allowDangerous || (process.env.ALLOW_DANGEROUS === 'true'))) {
+      throw new Error(`Dangerous browser arguments detected: ${dangerousArgs.join(', ')}. Fround from environment variable and tool call argument. ` +
+        'Set allowDangerous: true in the tool call arguments to override.');
+    }
+  }
+
+  try {
+    if ((browser && !browser.connected) ||
+      (launchOptions && (JSON.stringify(launchOptions) != JSON.stringify(previousLaunchOptions)))) {
+      await browser?.close();
+      browser = null;
+    }
+  }
+  catch (error) {
+    browser = null;
+  }
+
+  previousLaunchOptions = launchOptions;

-async function ensureBrowser() {
  if (!browser) {
    const npx_args = { headless: false }
    const docker_args = { headless: true, args: ["--no-sandbox", "--single-process", "--no-zygote"] }
-    browser = await puppeteer.launch(process.env.DOCKER_CONTAINER ? docker_args : npx_args);
+    browser = await puppeteer.launch(deepMerge(
+      process.env.DOCKER_CONTAINER ? docker_args : npx_args,
+      mergedConfig
+    ));
    const pages = await browser.pages();
    page = pages[0];

@@ -126,6 +177,31 @@ async function ensureBrowser() {
  return page!;
 }

+// Deep merge utility function
+function deepMerge(target: any, source: any): any {
+  const output = Object.assign({}, target);
+  if (typeof target !== 'object' || typeof source !== 'object') return source;
+
+  for (const key of Object.keys(source)) {
+    const targetVal = target[key];
+    const sourceVal = source[key];
+    if (Array.isArray(targetVal) && Array.isArray(sourceVal)) {
+      // Deduplicate args/ignoreDefaultArgs, prefer source values
+      output[key] = [...new Set([
+        ...(key === 'args' || key === 'ignoreDefaultArgs' ?
+          targetVal.filter((arg: string) => !sourceVal.some((launchArg: string) => arg.startsWith('--') && launchArg.startsWith(arg.split('=')[0]))) :
+          targetVal),
+        ...sourceVal
+      ])];
+    } else if (sourceVal instanceof Object && key in target) {
+      output[key] = deepMerge(targetVal, sourceVal);
+    } else {
+      output[key] = sourceVal;
+    }
+  }
+  return output;
+}
+
 declare global {
  interface Window {
    mcpHelper: {
@@ -136,7 +212,7 @@ declare global {
 }

 async function handleToolCall(name: string, args: any): Promise<CallToolResult> {
-  const page = await ensureBrowser();
+  const page = await ensureBrowser(args);

  switch (name) {
    case "puppeteer_navigate":
@@ -285,15 +361,15 @@ async function handleToolCall(name: string, args: any): Promise<CallToolResult>
              window.mcpHelper.logs.push(`[${method}] ${args.join(' ')}`);
              (window.mcpHelper.originalConsole as any)[method](...args);
            };
-          } );
-        } );
+          });
+        });

-        const result = await page.evaluate( args.script );
+        const result = await page.evaluate(args.script);

        const logs = await page.evaluate(() => {
          Object.assign(console, window.mcpHelper.originalConsole);
          const logs = window.mcpHelper.logs;
-          delete ( window as any).mcpHelper;
+          delete (window as any).mcpHelper;
          return logs;
        });

@@ -405,4 +481,4 @@ runServer().catch(console.error);
 process.stdin.on("close", () => {
  console.error("Puppeteer MCP Server closed");
  server.close();
-});
+});