通过 MCP 执行代码：构建更高效的 AI Agent

Posted on 十月 30, 2025

通过 MCP 执行代码：构建更高效的 AI Agent

来源: Anthropic Engineering Blog
作者: Anthropic Engineering Team
发布日期: 2025 年 10 月 30 日
类型: 技术架构
阅读时间: 约 12 分钟

概述

本文介绍了如何通过模型上下文协议 (MCP) 实现安全的代码执行能力，构建更高效的 AI Agent 系统。通过在隔离的沙箱环境中执行代码，AI Agent 可以获得强大的计算能力、数据处理能力和自动化能力。我们分享了代码执行架构设计、安全隔离机制、性能优化技巧和实际应用场景。

为什么需要代码执行

AI 模型的局限

纯语言模型在处理某些任务时有天然局限：

数学计算：

模型：$237 \times 492 = ?$ (可能出错)
代码：237 * 492 (准确结果)

数据处理：

模型：分析 10000 行数据 (无法完成)
代码：pandas.read_csv().analyze() (轻松完成)

精确操作：

模型：文件格式转换 (可能格式错误)
代码：json.dump(data) (精确格式)

代码执行的优势

能力	纯模型	模型 + 代码执行
数学计算	不准确	100% 准确
大数据处理	无法处理	高效处理
精确格式	可能出错	完全准确
重复操作	容易疲劳	始终一致
复杂逻辑	容易混乱	清晰执行

架构设计

整体架构

┌─────────────────────────────────────────────────────────────┐
│                      AI Agent                               │
│  - 理解用户意图                                             │
│  - 生成执行代码                                             │
│  - 解释执行结果                                             │
└─────────────────────────────────────────────────────────────┘
                              │
                              │ MCP 协议
                              ▼
┌─────────────────────────────────────────────────────────────┐
│                   代码执行服务器                             │
│  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐         │
│  │ 代码生成器   │  │ 沙箱执行器   │  │ 结果处理器   │         │
│  └─────────────┘  └─────────────┘  └─────────────┘         │
└─────────────────────────────────────────────────────────────┘
                              │
                              ▼
┌─────────────────────────────────────────────────────────────┐
│                    沙箱环境                                  │
│  - 资源限制 (CPU, 内存，磁盘)                                │
│  - 网络隔离                                                 │
│  - 文件系统隔离                                             │
└─────────────────────────────────────────────────────────────┘

MCP 服务器实现

from mcp.server import Server
from mcp.types import Tool, TextContent
import sandbox

app = Server("code-executor")

@app.list_tools()
async def list_tools() -> list[Tool]:
    return [
        Tool(
            name="execute_python",
            description="执行 Python 代码并返回结果",
            inputSchema={
                "type": "object",
                "properties": {
                    "code": {
                        "type": "string",
                        "description": "要执行的 Python 代码"
                    },
                    "timeout": {
                        "type": "integer",
                        "description": "执行超时 (秒)",
                        "default": 30
                    }
                },
                "required": ["code"]
            }
        )
    ]

@app.call_tool()
async def call_tool(name: str, args: dict) -> list[TextContent]:
    if name == "execute_python":
        code = args["code"]
        timeout = args.get("timeout", 30)

        # 在沙箱中执行代码
        result = await sandbox.execute(
            code=code,
            language="python",
            timeout=timeout
        )

        return [TextContent(
            type="text",
            text=f"执行结果:\n{result.stdout}\n错误:\n{result.stderr}"
        )]

安全隔离机制

沙箱设计

import docker
import resource
import subprocess

class CodeSandbox:
    def __init__(self):
        self.client = docker.from_client()

    async def execute(self, code: str, timeout: int = 30) -> ExecutionResult:
        # 创建临时容器
        container = self.client.containers.run(
            image="python:3.11-slim",
            command=f"python -c '{code}'",
            detach=True,
            remove=True,
            # 资源限制
            mem_limit="256m",
            cpu_quota=50000,
            # 网络隔离
            network_disabled=True,
            # 文件系统隔离
            volumes={'/tmp': {'bind': '/tmp', 'mode': 'rw'}},
            # 安全选项
            security_opt=["no-new-privileges"],
            cap_drop=["ALL"]
        )

        # 等待执行完成
        try:
            result = container.wait(timeout=timeout)
            logs = container.logs().decode('utf-8')

            return ExecutionResult(
                success=result["StatusCode"] == 0,
                stdout=logs,
                stderr=""
            )
        except Exception as e:
            container.kill()
            return ExecutionResult(
                success=False,
                stdout="",
                stderr=str(e)
            )

危险操作防护

DANGEROUS_PATTERNS = [
    "os.system",
    "subprocess",
    "eval(",
    "exec(",
    "__import__",
    "open(",
    "socket",
    "requests",
    "urllib"
]

def check_safety(code: str) -> SafetyResult:
    """检查代码安全性"""
    issues = []

    for pattern in DANGEROUS_PATTERNS:
        if pattern in code:
            issues.append(f"检测到危险模式：{pattern}")

    # 检查无限循环风险
    if re.search(r'while\s+True', code) and 'break' not in code:
        issues.append("可能的无限循环")

    # 检查资源滥用
    if re.search(r'range\(\d{7,}\)', code):
        issues.append("可能的资源滥用")

    return SafetyResult(
        is_safe=len(issues) == 0,
        issues=issues
    )

性能优化

代码预编译

class CompiledCodeCache:
    def __init__(self):
        self.cache = {}

    def get_or_compile(self, code: str) -> CompiledCode:
        code_hash = hashlib.sha256(code.encode()).hexdigest()

        if code_hash in self.cache:
            return self.cache[code_hash]

        compiled = compile(code, '<sandbox>', 'exec')
        self.cache[code_hash] = compiled
        return compiled

结果缓存

from functools import lru_cache

@lru_cache(maxsize=1000)
def execute_cached(code_hash: str, code: str) -> ExecutionResult:
    """带缓存的代码执行"""
    return sandbox.execute(code)

def execute_with_cache(code: str) -> ExecutionResult:
    code_hash = hashlib.sha256(code.encode()).hexdigest()
    return execute_cached(code_hash, code)

并发执行

import asyncio
from concurrent.futures import ThreadPoolExecutor

class ParallelExecutor:
    def __init__(self, max_workers=10):
        self.executor = ThreadPoolExecutor(max_workers=max_workers)

    async def execute_many(self, code_snippets: list[str]) -> list[ExecutionResult]:
        loop = asyncio.get_event_loop()

        tasks = [
            loop.run_in_executor(
                self.executor,
                self._execute_single,
                code
            )
            for code in code_snippets
        ]

        return await asyncio.gather(*tasks)

应用场景

场景 1：数据分析

# 用户上传数据文件，请求分析
code = """
import pandas as pd

df = pd.read_csv('data.csv')
print(df.describe())
print(df.corr())
"""
result = execute(code)
# AI 解释统计结果

场景 2：数学计算

# 复杂数学问题
code = """
import numpy as np
from scipy import integrate

# 计算定积分
result = integrate.quad(lambda x: x**2, 0, 1)
print(f"积分结果：{result[0]}")
"""

场景 3：图像处理

# 图像转换
code = """
from PIL import Image

img = Image.open('input.png')
img = img.resize((800, 600))
img.save('output.jpg')
print("转换完成")
"""

场景 4：API 集成

# 调用外部 API（在受控环境）
code = """
import requests

response = requests.get('https://api.example.com/data')
data = response.json()
print(f"获取到 {len(data)} 条记录")
"""

错误处理

超时处理

def execute_with_timeout(code: str, timeout: int) -> ExecutionResult:
    try:
        result = subprocess.run(
            ['python', '-c', code],
            capture_output=True,
            text=True,
            timeout=timeout
        )
        return ExecutionResult(
            success=result.returncode == 0,
            stdout=result.stdout,
            stderr=result.stderr
        )
    except subprocess.TimeoutExpired:
        return ExecutionResult(
            success=False,
            stdout="",
            stderr=f"执行超时 ({timeout}秒)"
        )

资源超限处理

def limit_resources(func):
    def wrapper(*args, **kwargs):
        # 设置资源限制
        resource.setrlimit(resource.RLIMIT_CPU, (30, 30))
        resource.setrlimit(resource.RLIMIT_AS, (256 * 1024 * 1024,))

        try:
            return func(*args, **kwargs)
        except resource.error as e:
            return ExecutionResult(
                success=False,
                stdout="",
                stderr=f"资源超限：{e}"
            )
    return wrapper

监控和日志

import logging

logger = logging.getLogger("code_executor")

class ExecutionMonitor:
    def __init__(self):
        self.metrics = {
            'total_executions': 0,
            'successful_executions': 0,
            'failed_executions': 0,
            'total_execution_time': 0
        }

    def record(self, result: ExecutionResult, duration: float):
        self.metrics['total_executions'] += 1
        if result.success:
            self.metrics['successful_executions'] += 1
        else:
            self.metrics['failed_executions'] += 1
        self.metrics['total_execution_time'] += duration

        logger.info(f"执行完成：成功={result.success}, 耗时={duration:.2f}s")

    def get_stats(self) -> dict:
        return {
            **self.metrics,
            'success_rate': self.metrics['successful_executions'] /
                           self.metrics['total_executions']
        }

关键要点总结

代码执行增强 AI 能力：弥补纯语言模型的局限
安全隔离至关重要：沙箱、资源限制、网络隔离
性能优化必要：缓存、预编译、并发执行
错误处理完善：超时、资源超限、异常捕获

个人评价

代码执行是 AI Agent 能力建设的关键一步：

优点：

能力增强：显著扩展 AI 能力边界
准确性提升：计算和数据处理 100% 准确
效率提高：自动化重复性任务

总体评价：

通过 MCP 实现代码执行是构建高效 AI Agent 的核心能力。在安全隔离的前提下，这为 AI 应用开辟了新的可能性。

本文内容翻译自 Anthropic Engineering Blog 官方博客。