吴恩达MCP课程(5):research_server_prompt_resource.py

代码

python 复制代码
import arxiv
import json
import os
from typing import List
from mcp.server.fastmcp import FastMCP

PAPER_DIR = "papers"

# Initialize FastMCP server
mcp = FastMCP("research")

@mcp.tool()
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    
    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_ids

@mcp.tool()
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
 
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."



@mcp.resource("papers://folders")
def get_available_folders() -> str:
    """
    List all available topic folders in the papers directory.
    
    This resource provides a simple list of all available topic folders.
    """
    folders = []
    
    # Get all topic directories
    if os.path.exists(PAPER_DIR):
        for topic_dir in os.listdir(PAPER_DIR):
            topic_path = os.path.join(PAPER_DIR, topic_dir)
            if os.path.isdir(topic_path):
                papers_file = os.path.join(topic_path, "papers_info.json")
                if os.path.exists(papers_file):
                    folders.append(topic_dir)
    
    # Create a simple markdown list
    content = "# Available Topics\n\n"
    if folders:
        for folder in folders:
            content += f"- {folder}\n"
        content += f"\nUse @{folder} to access papers in that topic.\n"
    else:
        content += "No topics found.\n"
    
    return content

@mcp.resource("papers://{topic}")
def get_topic_papers(topic: str) -> str:
    """
    Get detailed information about papers on a specific topic.
    
    Args:
        topic: The research topic to retrieve papers for
    """
    topic_dir = topic.lower().replace(" ", "_")
    papers_file = os.path.join(PAPER_DIR, topic_dir, "papers_info.json")
    
    if not os.path.exists(papers_file):
        return f"# No papers found for topic: {topic}\n\nTry searching for papers on this topic first."
    
    try:
        with open(papers_file, 'r') as f:
            papers_data = json.load(f)
        
        # Create markdown content with paper details
        content = f"# Papers on {topic.replace('_', ' ').title()}\n\n"
        content += f"Total papers: {len(papers_data)}\n\n"
        
        for paper_id, paper_info in papers_data.items():
            content += f"## {paper_info['title']}\n"
            content += f"- **Paper ID**: {paper_id}\n"
            content += f"- **Authors**: {', '.join(paper_info['authors'])}\n"
            content += f"- **Published**: {paper_info['published']}\n"
            content += f"- **PDF URL**: [{paper_info['pdf_url']}]({paper_info['pdf_url']})\n\n"
            content += f"### Summary\n{paper_info['summary'][:500]}...\n\n"
            content += "---\n\n"
        
        return content
    except json.JSONDecodeError:
        return f"# Error reading papers data for {topic}\n\nThe papers data file is corrupted."

@mcp.prompt()
def generate_search_prompt(topic: str, num_papers: int = 5) -> str:
    """Generate a prompt for Claude to find and discuss academic papers on a specific topic."""
    return f"""Search for {num_papers} academic papers about '{topic}' using the search_papers tool. 

Follow these instructions:
1. First, search for papers using search_papers(topic='{topic}', max_results={num_papers})
2. For each paper found, extract and organize the following information:
   - Paper title
   - Authors
   - Publication date
   - Brief summary of the key findings
   - Main contributions or innovations
   - Methodologies used
   - Relevance to the topic '{topic}'

3. Provide a comprehensive summary that includes:
   - Overview of the current state of research in '{topic}'
   - Common themes and trends across the papers
   - Key research gaps or areas for future investigation
   - Most impactful or influential papers in this area

4. Organize your findings in a clear, structured format with headings and bullet points for easy readability.

Please present both detailed information about each paper and a high-level synthesis of the research landscape in {topic}."""

if __name__ == "__main__":
    # Initialize and run the server
    mcp.run(transport='stdio')

代码解释

这个research_server_prompt_resource.py文件实现了一个基于MCP(Model Context Protocol)的研究服务器,主要用于搜索、存储和检索arXiv上的学术论文。下面是对代码的详细解释:

1. 导入和初始化

python 复制代码
import arxiv
import json
import os
from typing import List
from mcp.server.fastmcp import FastMCP

PAPER_DIR = "papers"

# Initialize FastMCP server
mcp = FastMCP("research")
  • 导入必要的库:arxiv用于访问arXiv API,json用于处理JSON数据,os用于文件操作
  • 定义论文存储目录为papers
  • 创建一个名为"research"的FastMCP服务器实例

2. 工具函数:搜索论文

python 复制代码
@mcp.tool()
def search_papers(topic: str, max_results: int = 5) -> List[str]:

这个函数被注册为MCP工具,用于根据主题搜索arXiv上的论文:

  • 功能:搜索arXiv上与指定主题相关的论文,并将结果保存到本地
  • 参数
    • topic:搜索主题
    • max_results:最大结果数(默认5篇)
  • 返回值:找到的论文ID列表

核心实现:

  1. 使用arxiv客户端搜索相关论文
  2. 为主题创建目录(如果不存在)
  3. 尝试加载现有的论文信息(如果有)
  4. 处理每篇论文,提取标题、作者、摘要等信息
  5. 将论文信息保存到JSON文件中

3. 工具函数:提取论文信息

python 复制代码
@mcp.tool()
def extract_info(paper_id: str) -> str:

这个函数被注册为MCP工具,用于根据论文ID检索特定论文的详细信息:

  • 功能:在所有主题目录中搜索指定ID的论文信息
  • 参数paper_id:要查找的论文ID
  • 返回值:包含论文信息的JSON字符串,或未找到时的错误消息

实现逻辑:

  1. 遍历papers目录下的所有主题文件夹
  2. 检查每个文件夹中的papers_info.json文件
  3. 如果找到匹配的论文ID,返回其详细信息

4. 资源函数:获取可用文件夹

python 复制代码
@mcp.resource("papers://folders")
def get_available_folders() -> str:

这个函数被注册为MCP资源,提供URI为papers://folders的访问点:

  • 功能 :列出papers目录中所有可用的主题文件夹
  • 返回值:包含所有主题列表的Markdown格式文本

实现方式:

  1. 扫描papers目录,查找包含papers_info.json文件的子目录
  2. 将找到的主题列表格式化为Markdown文本
  3. 添加使用说明(如何访问特定主题)

5. 资源函数:获取主题论文

python 复制代码
@mcp.resource("papers://{topic}")
def get_topic_papers(topic: str) -> str:

这个函数被注册为MCP资源,提供动态URI papers://{topic}的访问点:

  • 功能:获取特定主题的所有论文详细信息
  • 参数topic:要检索的研究主题
  • 返回值:包含该主题所有论文详细信息的Markdown格式文本

实现细节:

  1. 根据主题名构建文件路径
  2. 检查并加载该主题的论文信息JSON文件
  3. 将论文信息格式化为结构化的Markdown文档
  4. 包含每篇论文的标题、ID、作者、发布日期、PDF链接和摘要

6. 提示函数:生成搜索提示

python 复制代码
@mcp.prompt()
def generate_search_prompt(topic: str, num_papers: int = 5) -> str:

这个函数被注册为MCP提示,用于生成结构化的搜索指令:

  • 功能:生成一个提示文本,指导AI如何搜索和讨论特定主题的学术论文
  • 参数
    • topic:要搜索的主题
    • num_papers:要检索的论文数量(默认5篇)
  • 返回值:格式化的提示文本

提示内容包括:

  1. 使用search_papers工具搜索论文的指令
  2. 如何提取和组织每篇论文的信息(标题、作者、发布日期等)
  3. 如何提供综合摘要(研究现状、共同主题、研究空白等)
  4. 如何组织和呈现结果

7. 主程序

python 复制代码
if __name__ == "__main__":
    # Initialize and run the server
    mcp.run(transport='stdio')

当脚本作为主程序运行时,启动MCP服务器,使用标准输入/输出(stdio)作为传输方式。

总结

这个服务器实现了以下核心功能:

  1. 论文搜索与存储:通过arXiv API搜索论文并将结果保存到本地JSON文件
  2. 论文信息检索:根据论文ID或主题检索论文详细信息
  3. 资源访问:提供URI访问点,用于获取可用主题列表和特定主题的论文信息
  4. 提示生成:生成结构化提示,指导AI如何搜索和分析学术论文

这个服务器设计为与MCP兼容的工具,可以被MCP客户端(如聊天机器人)调用,为用户提供学术论文搜索和分析功能。

相关推荐
波点兔22 分钟前
【亲测有效 | Cursor Pro每月500次快速请求扩5倍】(Windows版)Cursor中集成interactive-feedback-mcp
windows·mcp·cursor pro
CodeAgent2 小时前
【MCP 第二篇】实现一个简易的MCP
ai编程·mcp
lowcode3 小时前
MCP协议在LLM系统中的架构与实现原理研究
人工智能·llm·mcp
银空飞羽15 小时前
再学学MCP间接提示词注入
安全·mcp·trae
黎燃18 小时前
从底层逻辑看 MCP:智能计算的新引擎
mcp
一只爱撸猫的程序猿18 小时前
构建一个简单智能客户服务系统的案例
spring boot·程序员·mcp
高冷的程序员大大20 小时前
如何使用MCP开发一个客户端和服务端
程序员·mcp
cpp加油站1 天前
发现宝藏:腾讯EdgeOne Pages & 掘金MCP,Trae内一键部署网页(玩转100个MCP系列第一弹)
ai编程·mcp·trae