吴恩达MCP课程（5）：research_server_prompt_resource.py

代码

python 复制代码

import arxiv
import json
import os
from typing import List
from mcp.server.fastmcp import FastMCP

PAPER_DIR = "papers"

# Initialize FastMCP server
mcp = FastMCP("research")

@mcp.tool()
def search_papers(topic: str, max_results: int = 5) -> List[str]:
    """
    Search for papers on arXiv based on a topic and store their information.
    
    Args:
        topic: The topic to search for
        max_results: Maximum number of results to retrieve (default: 5)
        
    Returns:
        List of paper IDs found in the search
    """
    
    # Use arxiv to find the papers 
    client = arxiv.Client()

    # Search for the most relevant articles matching the queried topic
    search = arxiv.Search(
        query = topic,
        max_results = max_results,
        sort_by = arxiv.SortCriterion.Relevance
    )

    papers = client.results(search)
    
    # Create directory for this topic
    path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
    os.makedirs(path, exist_ok=True)
    
    file_path = os.path.join(path, "papers_info.json")

    # Try to load existing papers info
    try:
        with open(file_path, "r") as json_file:
            papers_info = json.load(json_file)
    except (FileNotFoundError, json.JSONDecodeError):
        papers_info = {}

    # Process each paper and add to papers_info  
    paper_ids = []
    for paper in papers:
        paper_ids.append(paper.get_short_id())
        paper_info = {
            'title': paper.title,
            'authors': [author.name for author in paper.authors],
            'summary': paper.summary,
            'pdf_url': paper.pdf_url,
            'published': str(paper.published.date())
        }
        papers_info[paper.get_short_id()] = paper_info
    
    # Save updated papers_info to json file
    with open(file_path, "w") as json_file:
        json.dump(papers_info, json_file, indent=2)
    
    print(f"Results are saved in: {file_path}")
    
    return paper_ids

@mcp.tool()
def extract_info(paper_id: str) -> str:
    """
    Search for information about a specific paper across all topic directories.
    
    Args:
        paper_id: The ID of the paper to look for
        
    Returns:
        JSON string with paper information if found, error message if not found
    """
 
    for item in os.listdir(PAPER_DIR):
        item_path = os.path.join(PAPER_DIR, item)
        if os.path.isdir(item_path):
            file_path = os.path.join(item_path, "papers_info.json")
            if os.path.isfile(file_path):
                try:
                    with open(file_path, "r") as json_file:
                        papers_info = json.load(json_file)
                        if paper_id in papers_info:
                            return json.dumps(papers_info[paper_id], indent=2)
                except (FileNotFoundError, json.JSONDecodeError) as e:
                    print(f"Error reading {file_path}: {str(e)}")
                    continue
    
    return f"There's no saved information related to paper {paper_id}."



@mcp.resource("papers://folders")
def get_available_folders() -> str:
    """
    List all available topic folders in the papers directory.
    
    This resource provides a simple list of all available topic folders.
    """
    folders = []
    
    # Get all topic directories
    if os.path.exists(PAPER_DIR):
        for topic_dir in os.listdir(PAPER_DIR):
            topic_path = os.path.join(PAPER_DIR, topic_dir)
            if os.path.isdir(topic_path):
                papers_file = os.path.join(topic_path, "papers_info.json")
                if os.path.exists(papers_file):
                    folders.append(topic_dir)
    
    # Create a simple markdown list
    content = "# Available Topics\n\n"
    if folders:
        for folder in folders:
            content += f"- {folder}\n"
        content += f"\nUse @{folder} to access papers in that topic.\n"
    else:
        content += "No topics found.\n"
    
    return content

@mcp.resource("papers://{topic}")
def get_topic_papers(topic: str) -> str:
    """
    Get detailed information about papers on a specific topic.
    
    Args:
        topic: The research topic to retrieve papers for
    """
    topic_dir = topic.lower().replace(" ", "_")
    papers_file = os.path.join(PAPER_DIR, topic_dir, "papers_info.json")
    
    if not os.path.exists(papers_file):
        return f"# No papers found for topic: {topic}\n\nTry searching for papers on this topic first."
    
    try:
        with open(papers_file, 'r') as f:
            papers_data = json.load(f)
        
        # Create markdown content with paper details
        content = f"# Papers on {topic.replace('_', ' ').title()}\n\n"
        content += f"Total papers: {len(papers_data)}\n\n"
        
        for paper_id, paper_info in papers_data.items():
            content += f"## {paper_info['title']}\n"
            content += f"- **Paper ID**: {paper_id}\n"
            content += f"- **Authors**: {', '.join(paper_info['authors'])}\n"
            content += f"- **Published**: {paper_info['published']}\n"
            content += f"- **PDF URL**: [{paper_info['pdf_url']}]({paper_info['pdf_url']})\n\n"
            content += f"### Summary\n{paper_info['summary'][:500]}...\n\n"
            content += "---\n\n"
        
        return content
    except json.JSONDecodeError:
        return f"# Error reading papers data for {topic}\n\nThe papers data file is corrupted."

@mcp.prompt()
def generate_search_prompt(topic: str, num_papers: int = 5) -> str:
    """Generate a prompt for Claude to find and discuss academic papers on a specific topic."""
    return f"""Search for {num_papers} academic papers about '{topic}' using the search_papers tool. 

Follow these instructions:
1. First, search for papers using search_papers(topic='{topic}', max_results={num_papers})
2. For each paper found, extract and organize the following information:
   - Paper title
   - Authors
   - Publication date
   - Brief summary of the key findings
   - Main contributions or innovations
   - Methodologies used
   - Relevance to the topic '{topic}'

3. Provide a comprehensive summary that includes:
   - Overview of the current state of research in '{topic}'
   - Common themes and trends across the papers
   - Key research gaps or areas for future investigation
   - Most impactful or influential papers in this area

4. Organize your findings in a clear, structured format with headings and bullet points for easy readability.

Please present both detailed information about each paper and a high-level synthesis of the research landscape in {topic}."""

if __name__ == "__main__":
    # Initialize and run the server
    mcp.run(transport='stdio')

代码解释

这个research_server_prompt_resource.py文件实现了一个基于MCP（Model Context Protocol）的研究服务器，主要用于搜索、存储和检索arXiv上的学术论文。下面是对代码的详细解释：

1. 导入和初始化

python 复制代码

import arxiv
import json
import os
from typing import List
from mcp.server.fastmcp import FastMCP

PAPER_DIR = "papers"

# Initialize FastMCP server
mcp = FastMCP("research")

导入必要的库：arxiv用于访问arXiv API，json用于处理JSON数据，os用于文件操作
定义论文存储目录为papers
创建一个名为"research"的FastMCP服务器实例

2. 工具函数：搜索论文

python 复制代码

@mcp.tool()
def search_papers(topic: str, max_results: int = 5) -> List[str]:

这个函数被注册为MCP工具，用于根据主题搜索arXiv上的论文：

功能：搜索arXiv上与指定主题相关的论文，并将结果保存到本地
参数：
- topic：搜索主题
- max_results：最大结果数（默认5篇）
返回值：找到的论文ID列表

核心实现：

使用arxiv客户端搜索相关论文
为主题创建目录（如果不存在）
尝试加载现有的论文信息（如果有）
处理每篇论文，提取标题、作者、摘要等信息
将论文信息保存到JSON文件中

3. 工具函数：提取论文信息

python 复制代码

@mcp.tool()
def extract_info(paper_id: str) -> str:

这个函数被注册为MCP工具，用于根据论文ID检索特定论文的详细信息：

功能：在所有主题目录中搜索指定ID的论文信息
参数：paper_id：要查找的论文ID
返回值：包含论文信息的JSON字符串，或未找到时的错误消息

实现逻辑：

遍历papers目录下的所有主题文件夹
检查每个文件夹中的papers_info.json文件
如果找到匹配的论文ID，返回其详细信息

4. 资源函数：获取可用文件夹

python 复制代码

@mcp.resource("papers://folders")
def get_available_folders() -> str:

这个函数被注册为MCP资源，提供URI为papers://folders的访问点：

功能：列出papers目录中所有可用的主题文件夹
返回值：包含所有主题列表的Markdown格式文本

实现方式：

扫描papers目录，查找包含papers_info.json文件的子目录
将找到的主题列表格式化为Markdown文本
添加使用说明（如何访问特定主题）

5. 资源函数：获取主题论文

python 复制代码

@mcp.resource("papers://{topic}")
def get_topic_papers(topic: str) -> str:

这个函数被注册为MCP资源，提供动态URI papers://{topic}的访问点：

功能：获取特定主题的所有论文详细信息
参数：topic：要检索的研究主题
返回值：包含该主题所有论文详细信息的Markdown格式文本

实现细节：

根据主题名构建文件路径
检查并加载该主题的论文信息JSON文件
将论文信息格式化为结构化的Markdown文档
包含每篇论文的标题、ID、作者、发布日期、PDF链接和摘要

6. 提示函数：生成搜索提示

python 复制代码

@mcp.prompt()
def generate_search_prompt(topic: str, num_papers: int = 5) -> str:

这个函数被注册为MCP提示，用于生成结构化的搜索指令：

功能：生成一个提示文本，指导AI如何搜索和讨论特定主题的学术论文
参数：
- topic：要搜索的主题
- num_papers：要检索的论文数量（默认5篇）
返回值：格式化的提示文本

提示内容包括：

使用search_papers工具搜索论文的指令
如何提取和组织每篇论文的信息（标题、作者、发布日期等）
如何提供综合摘要（研究现状、共同主题、研究空白等）
如何组织和呈现结果

7. 主程序

python 复制代码

if __name__ == "__main__":
    # Initialize and run the server
    mcp.run(transport='stdio')

当脚本作为主程序运行时，启动MCP服务器，使用标准输入/输出（stdio）作为传输方式。

总结

这个服务器实现了以下核心功能：

论文搜索与存储：通过arXiv API搜索论文并将结果保存到本地JSON文件
论文信息检索：根据论文ID或主题检索论文详细信息
资源访问：提供URI访问点，用于获取可用主题列表和特定主题的论文信息
提示生成：生成结构化提示，指导AI如何搜索和分析学术论文

这个服务器设计为与MCP兼容的工具，可以被MCP客户端（如聊天机器人）调用，为用户提供学术论文搜索和分析功能。