
前言
MCP(Model Context Protocol)正在成为跨境电商AI工作流的核心基础设施之一。本文将从技术角度深度解析如何将MCP协议与亚马逊数据采集体系集成,构建一套真正能在生产环境中跑通的亚马逊MCP数据运营工具链------覆盖数据接入、上下文构建、AI推理、以及决策输出的完整链路。
适合读者:有Python基础、了解API调用、正在探索AI工作流开发的跨境电商技术团队。
一、MCP协议架构基础
1.1 MCP是什么
Model Context Protocol是Anthropic于2024年11月正式开源的标准协议,旨在解决大语言模型与外部工具、数据源之间的集成标准化问题。
核心架构由三个角色构成:
| 角色 | 职责 | 典型实现 |
|---|---|---|
| MCP Host | 托管和协调AI模型与工具的运行环境 | Claude Desktop / 自建应用 |
| MCP Client | 负责维护与MCP Server的连接 | 嵌入Host的客户端模块 |
| MCP Server | 暴露工具、资源、提示词的服务端 | 数据API封装层(如Pangolinfo) |
对于亚马逊MCP数据运营场景,关键是构建一个高质量的MCP Server,将亚马逊数据采集能力封装为标准化工具,供AI模型通过MCP协议实时调用。
1.2 工具调用流程
用户 Prompt
↓
AI Model (Claude/GPT)
↓ [tool_call: scan_bsr_opportunities]
MCP Client
↓ [JSON-RPC]
MCP Server (Pangolinfo Data Layer)
↓ [HTTP API]
Pangolinfo Scrape API → 亚马逊实时数据
↓
结构化 JSON 返回
↓
AI Model 推理 → 选品决策输出
二、环境配置与依赖安装
bash
# 核心依赖
pip install anthropic mcp requests python-dotenv
# 可选:异步支持
pip install aiohttp asyncio
# 开发调试
pip install mcp[dev]
python
# .env 配置示例
PANGOLINFO_API_KEY=your_pangolinfo_api_key
ANTHROPIC_API_KEY=your_anthropic_api_key
三、MCP Server 实现:封装 Pangolinfo 数据工具
3.1 亚马逊BSR机会扫描工具
python
# server/amazon_mcp_server.py
import asyncio
import json
import requests
import os
from mcp.server import Server
from mcp.server.models import InitializationOptions
from mcp.server.stdio import stdio_server
from mcp import types
# 初始化MCP Server
server = Server("pangolinfo-amazon-mcp")
PANGOLINFO_API_KEY = os.getenv("PANGOLINFO_API_KEY")
BASE_URL = "https://api.pangolinfo.com/v2"
def get_headers():
return {
"Authorization": f"Bearer {PANGOLINFO_API_KEY}",
"Content-Type": "application/json"
}
@server.list_tools()
async def handle_list_tools() -> list[types.Tool]:
"""声明所有可用工具"""
return [
types.Tool(
name="scan_bsr_opportunities",
description="扫描亚马逊指定类目的BSR机会品,基于实时榜单变化筛选低竞争、高增速的产品。",
inputSchema={
"type": "object",
"properties": {
"category_id": {
"type": "string",
"description": "亚马逊类目节点ID(如宠物用品:2975312011)"
},
"marketplace": {
"type": "string",
"description": "目标市场(US/UK/DE/JP/CA),默认US",
"default": "US"
},
"days": {
"type": "integer",
"description": "历史分析窗口(天),默认7天",
"default": 7
},
"max_reviews": {
"type": "integer",
"description": "最大评论数阈值,用于筛选低竞争品",
"default": 150
},
"min_sales": {
"type": "integer",
"description": "最小月销量估计",
"default": 300
}
},
"required": ["category_id"]
}
),
types.Tool(
name="get_product_reviews_analysis",
description="获取指定ASIN的评论数据并生成结构化分析报告,用于产品迭代决策。",
inputSchema={
"type": "object",
"properties": {
"asin": {
"type": "string",
"description": "亚马逊产品ASIN"
},
"marketplace": {
"type": "string",
"description": "目标市场",
"default": "US"
},
"max_pages": {
"type": "integer",
"description": "抓取页数上限,每页约10条评论",
"default": 10
},
"rating_filter": {
"type": "string",
"description": "评分筛选:all/one_star/two_star/critical",
"default": "critical" # 默认抓取差评
}
},
"required": ["asin"]
}
),
types.Tool(
name="get_ad_placement_intelligence",
description="获取指定关键词的SP广告位分布数据,分析竞价格局和广告投放机会。",
inputSchema={
"type": "object",
"properties": {
"keywords": {
"type": "array",
"items": {"type": "string"},
"description": "目标关键词列表(最多10个)"
},
"marketplace": {
"type": "string",
"description": "目标市场",
"default": "US"
}
},
"required": ["keywords"]
}
)
]
@server.call_tool()
async def handle_call_tool(name: str, arguments: dict) -> list[types.TextContent]:
"""处理工具调用"""
if name == "scan_bsr_opportunities":
result = await _scan_bsr(arguments)
elif name == "get_product_reviews_analysis":
result = await _get_reviews(arguments)
elif name == "get_ad_placement_intelligence":
result = await _get_ad_intelligence(arguments)
else:
result = {"error": f"Unknown tool: {name}"}
return [types.TextContent(
type="text",
text=json.dumps(result, ensure_ascii=False, indent=2)
)]
async def _scan_bsr(args: dict) -> dict:
"""BSR机会扫描核心逻辑"""
payload = {
"category_id": args["category_id"],
"country": args.get("marketplace", "US"),
"days": args.get("days", 7),
"filters": {
"max_reviews": args.get("max_reviews", 150),
"min_monthly_sales": args.get("min_sales", 300),
"bsr_change_threshold": 10
},
"output_format": "json"
}
try:
response = requests.post(
f"{BASE_URL}/amazon/bestsellers",
json=payload,
headers=get_headers(),
timeout=30
)
response.raise_for_status()
data = response.json()
# 格式化为AI友好的结构
return {
"scan_summary": {
"category": args["category_id"],
"marketplace": args.get("marketplace", "US"),
"analysis_window": f"{args.get('days', 7)} days",
"opportunities_found": len(data.get("results", []))
},
"opportunities": [{
"rank": idx + 1,
"asin": item["asin"],
"product_title": item["title"],
"current_bsr": item["bsr_current"],
"bsr_improvement_7d": f"+{item['bsr_change']} positions",
"monthly_sales_estimate": item["monthly_sales"],
"review_count": item["review_count"],
"average_rating": item["rating"],
"price_usd": item["price"],
"opportunity_score": item.get("opportunity_score", "N/A"),
"entry_difficulty": item.get("entry_difficulty", "medium")
} for idx, item in enumerate(data.get("results", []))]
}
except requests.RequestException as e:
return {"error": f"API call failed: {str(e)}"}
async def _get_reviews(args: dict) -> dict:
"""评论分析核心逻辑"""
payload = {
"asin": args["asin"],
"country": args.get("marketplace", "US"),
"max_pages": args.get("max_pages", 10),
"rating_filter": args.get("rating_filter", "critical"),
"include_customer_says": True, # 包含Customer Says AI摘要
"output_format": "json"
}
try:
response = requests.post(
f"{BASE_URL}/amazon/reviews",
json=payload,
headers=get_headers(),
timeout=60
)
response.raise_for_status()
data = response.json()
return {
"asin": args["asin"],
"total_reviews_fetched": data.get("total_fetched", 0),
"customer_says_summary": data.get("customer_says", ""),
"rating_distribution": data.get("rating_distribution", {}),
"top_negative_themes": data.get("negative_themes", []),
"top_positive_themes": data.get("positive_themes", []),
"sample_reviews": data.get("reviews", [])[:20] # 返回前20条样本
}
except requests.RequestException as e:
return {"error": f"Reviews API call failed: {str(e)}"}
async def _get_ad_intelligence(args: dict) -> dict:
"""广告位数据核心逻辑"""
payload = {
"keywords": args["keywords"],
"country": args.get("marketplace", "US"),
"include_sponsored_positions": True,
"output_format": "json"
}
try:
response = requests.post(
f"{BASE_URL}/amazon/ad-intelligence",
json=payload,
headers=get_headers(),
timeout=30
)
response.raise_for_status()
data = response.json()
return {
"keywords_analyzed": args["keywords"],
"ad_intelligence": data.get("results", [])
}
except requests.RequestException as e:
return {"error": f"Ad Intelligence API call failed: {str(e)}"}
# 启动MCP Server
async def main():
async with stdio_server() as (read_stream, write_stream):
await server.run(
read_stream,
write_stream,
InitializationOptions(
server_name="pangolinfo-amazon-mcp",
server_version="1.0.0"
)
)
if __name__ == "__main__":
asyncio.run(main())
四、AI Client:接入Claude进行选品推理
python
# client/amazon_ai_ops.py
import asyncio
import json
import anthropic
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
async def run_amazon_opportunity_analysis(
category_id: str,
marketplace: str = "US",
analysis_prompt: str = None
):
"""
完整的亚马逊MCP数据运营工作流:
连接MCP Server → 调用工具获取实时数据 → AI推理 → 输出选品决策
"""
# 配置MCP Server连接
server_params = StdioServerParameters(
command="python",
args=["server/amazon_mcp_server.py"],
env={"PANGOLINFO_API_KEY": "your_key_here"}
)
anthropic_client = anthropic.Anthropic()
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
# 初始化连接
await session.initialize()
# 获取可用工具列表
tools_response = await session.list_tools()
# 转换为Anthropic API格式
tools = []
for tool in tools_response.tools:
tools.append({
"name": tool.name,
"description": tool.description,
"input_schema": tool.inputSchema
})
# 构建默认分析提示词
if not analysis_prompt:
analysis_prompt = f"""
请对亚马逊{marketplace}市场的类目 {category_id} 进行选品机会分析:
1. 扫描过去7天内BSR排名提升幅度最大的产品,筛选评论数少于150条的候选品
2. 对排名第一的机会品,获取其差评数据,分析产品改进方向
3. 综合以上数据,给出:
- 前3个最值得关注的机会品及推荐理由
- 切入该类目的差异化方向建议
- 风险提示
请以结构化报告形式输出。
"""
messages = [{"role": "user", "content": analysis_prompt}]
# 开始AI推理循环(支持多轮工具调用)
while True:
response = anthropic_client.messages.create(
model="claude-opus-4-5",
max_tokens=4096,
tools=tools,
messages=messages
)
# 如果不需要工具调用,直接返回最终回答
if response.stop_reason == "end_turn":
final_answer = next(
(block.text for block in response.content
if hasattr(block, "text")),
""
)
return final_answer
# 处理工具调用
if response.stop_reason == "tool_use":
tool_results = []
for block in response.content:
if block.type == "tool_use":
print(f"[MCP] 调用工具: {block.name}")
print(f"[MCP] 参数: {json.dumps(block.input, ensure_ascii=False)}")
# 通过MCP调用工具
result = await session.call_tool(
block.name,
arguments=block.input
)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result.content[0].text if result.content else "No data"
})
print(f"[MCP] 工具返回: {len(result.content[0].text)} 字符")
# 将工具调用结果添加到消息历史
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": tool_results})
# 示例用法
async def main():
print("启动亚马逊MCP数据运营分析...")
result = await run_amazon_opportunity_analysis(
category_id="2975312011", # 宠物用品
marketplace="US"
)
print("\n" + "="*60)
print("选品分析报告")
print("="*60)
print(result)
if __name__ == "__main__":
asyncio.run(main())
五、常见问题与解决方案
Q:MCP Server连接超时怎么办?
调整 stdio_client 的 timeout 参数,或在 Pangolinfo API 调用中添加重试机制:
python
from tenacity import retry, stop_after_attempt, wait_exponential
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=2, max=8))
def call_pangolinfo_api(endpoint, payload):
response = requests.post(endpoint, json=payload, headers=get_headers(), timeout=30)
response.raise_for_status()
return response.json()
Q:如何提高AI工具调用的准确性?
在工具的 description 字段提供足够具体的使用场景说明,在 inputSchema 中为每个参数添加详细描述。AI选择工具的依据主要是这两个字段的质量。
Q:Pangolinfo API的并发限制是多少?
视套餐而定,建议在生产环境中使用异步调用并配置请求队列,避免触发限流。具体并发参数参考 Pangolinfo 文档中心。
六、性能优化建议
- 数据缓存策略:BSR数据每小时更新一次即可满足大多数场景,针对同一类目建立本地Redis缓存,避免重复API调用
- 异步并发调用 :使用
aiohttp替换requests,支持多类目并发扫描 - 流式输出 :Claude支持流式响应,长报告生成场景中使用
stream=True改善用户体验 - 错误降级:API调用失败时返回上次缓存的数据并标记时间戳,避免完全中断工作流
总结
本文覆盖了亚马逊MCP数据运营体系的完整技术实现链路:从MCP Server构建到AI多轮工具调用,从Pangolinfo数据接入到结构化选品报告输出。核心设计原则只有一条:数据即时性是整个体系的竞争力上限,Scrape API的分钟级更新能力正是为这个场景设计的。
接下来如果需要进一步拓展,可以考虑加入ABA搜索词趋势分析工具,以及Movers & Shakers急速上升榜的实时追踪工具,形成完整的"机会发现-验证-监控"闭环。