03-深度学习基础：RAG检索增强生成

RAG检索增强生成：让大模型连接外部知识

一、为什么需要RAG？

1.1 大模型的固有问题

python 复制代码

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("大模型的三大局限")
print("=" * 60)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# 1. 知识截止日期
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('知识截止日期', fontsize=12)

knowledge_cutoff = {
    'GPT-3': '2021年6月',
    'GPT-4': '2021年9月',
    'LLaMA-2': '2022年12月',
    'Claude-2': '2023年初'
}

y_pos = 0.7
for model, date in knowledge_cutoff.items():
    ax1.text(0.1, y_pos, f"{model}:", fontsize=9, fontweight='bold')
    ax1.text(0.5, y_pos, date, fontsize=9)
    y_pos -= 0.12

ax1.text(0.5, 0.2, "无法回答训练后的事件", ha='center', fontsize=9,
         bbox=dict(boxstyle='round', facecolor='lightcoral'))

# 2. 幻觉问题
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('幻觉问题', fontsize=12)

hallucination = [
    ("问: 2024年奥运会在哪举办？", "答: 2024年奥运会在..."),
    ("问: 最新的AI突破是什么？", "答: 根据我的知识..."),
]

y_pos = 0.7
for q, a in hallucination:
    ax2.text(0.05, y_pos, q, fontsize=8)
    ax2.text(0.05, y_pos-0.08, a, fontsize=8, color='red')
    y_pos -= 0.2

ax2.text(0.5, 0.2, "模型可能编造不存在的答案", ha='center', fontsize=9,
         bbox=dict(boxstyle='round', facecolor='lightcoral'))

# 3. 私有知识无法访问
ax3 = axes[2]
ax3.axis('off')
ax3.set_title('私有知识访问', fontsize=12)

ax3.text(0.5, 0.7, "公司内部文档", ha='center', fontsize=10,
         bbox=dict(boxstyle='round', facecolor='lightblue'))
ax3.text(0.5, 0.55, "个人邮件", ha='center', fontsize=10,
         bbox=dict(boxstyle='round', facecolor='lightblue'))
ax3.text(0.5, 0.4, "实时数据", ha='center', fontsize=10,
         bbox=dict(boxstyle='round', facecolor='lightblue'))

ax3.annotate('', xy=(0.5, 0.35), xytext=(0.5, 0.45),
            arrowprops=dict(arrowstyle='->', lw=2))
ax3.text(0.5, 0.25, "大模型无法访问", ha='center', fontsize=9, color='red')

plt.suptitle('大模型的核心局限', fontsize=14)
plt.tight_layout()
plt.show()

print("\n💡 RAG的解决方案:")
print("   1. 实时检索最新信息")
print("   2. 引用可靠来源，减少幻觉")
print("   3. 连接私有知识库")

二、RAG的核心架构

2.1 RAG工作流程

python 复制代码

def visualize_rag_architecture():
    """可视化RAG架构"""
    
    fig, ax = plt.subplots(figsize=(14, 8))
    ax.axis('off')
    
    # 1. 用户查询
    query_box = plt.Rectangle((0.35, 0.85), 0.3, 0.08,
                               facecolor='lightcoral', ec='black')
    ax.add_patch(query_box)
    ax.text(0.5, 0.89, '用户提问', ha='center', va='center', fontsize=10)
    
    # 2. 检索
    retrieval_box = plt.Rectangle((0.35, 0.65), 0.3, 0.12,
                                   facecolor='lightblue', ec='black')
    ax.add_patch(retrieval_box)
    ax.text(0.5, 0.71, '检索相关文档', ha='center', va='center', fontsize=10)
    
    # 知识库
    knowledge_base = plt.Rectangle((0.05, 0.55), 0.25, 0.25,
                                    facecolor='lightgray', ec='black')
    ax.add_patch(knowledge_base)
    ax.text(0.175, 0.675, '知识库', ha='center', va='center', fontsize=9)
    ax.text(0.175, 0.63, '• 文档\n• 网页\n• 数据库', ha='center', va='center', fontsize=7)
    
    # 3. 增强
    augmentation_box = plt.Rectangle((0.35, 0.45), 0.3, 0.12,
                                      facecolor='lightgreen', ec='black')
    ax.add_patch(augmentation_box)
    ax.text(0.5, 0.51, '增强提示', ha='center', va='center', fontsize=10)
    
    # 4. 生成
    generation_box = plt.Rectangle((0.35, 0.25), 0.3, 0.12,
                                    facecolor='lightyellow', ec='black')
    ax.add_patch(generation_box)
    ax.text(0.5, 0.31, '大模型生成', ha='center', va='center', fontsize=10)
    
    # 5. 答案
    answer_box = plt.Rectangle((0.35, 0.08), 0.3, 0.1,
                                facecolor='lightcoral', ec='black')
    ax.add_patch(answer_box)
    ax.text(0.5, 0.13, '最终答案', ha='center', va='center', fontsize=10)
    
    # 连接线
    ax.annotate('', xy=(0.5, 0.85), xytext=(0.5, 0.77),
               arrowprops=dict(arrowstyle='->', lw=2))
    ax.annotate('', xy=(0.5, 0.65), xytext=(0.5, 0.57),
               arrowprops=dict(arrowstyle='->', lw=2))
    ax.annotate('', xy=(0.5, 0.45), xytext=(0.5, 0.37),
               arrowprops=dict(arrowstyle='->', lw=2))
    ax.annotate('', xy=(0.5, 0.25), xytext=(0.5, 0.18),
               arrowprops=dict(arrowstyle='->', lw=2))
    
    # 检索连接
    ax.annotate('', xy=(0.35, 0.71), xytext=(0.3, 0.71),
               arrowprops=dict(arrowstyle='->', lw=2))
    ax.annotate('', xy=(0.3, 0.71), xytext=(0.3, 0.67),
               arrowprops=dict(arrowstyle='->', lw=2))
    
    ax.set_title('RAG架构：检索 → 增强 → 生成', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_rag_architecture()

三、向量数据库与嵌入

3.1 文本嵌入原理

python 复制代码

def visualize_embeddings():
    """可视化文本嵌入"""
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 1. 嵌入空间可视化
    ax1 = axes[0]
    
    # 模拟文本嵌入点
    np.random.seed(42)
    categories = {
        'AI': np.random.randn(10, 2) + [2, 2],
        '编程': np.random.randn(10, 2) + [1, 1],
        '水果': np.random.randn(10, 2) + [-2, -1],
        '动物': np.random.randn(10, 2) + [-1, -2]
    }
    
    colors = {'AI': 'red', '编程': 'blue', '水果': 'green', '动物': 'orange'}
    
    for category, points in categories.items():
        ax1.scatter(points[:, 0], points[:, 1], c=colors[category], 
                   label=category, alpha=0.6, s=30)
    
    ax1.set_xlabel('维度1')
    ax1.set_ylabel('维度2')
    ax1.set_title('文本嵌入向量空间')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 2. 相似度计算
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('相似度计算', fontsize=12)
    
    similarity_formula = """
    余弦相似度 (Cosine Similarity):
    
    similarity = cos(θ) = (A·B) / (||A|| ||B||)
    
    特点:
    • 范围: [-1, 1]
    • 1: 完全相同方向
    • 0: 正交(不相关)
    • -1: 完全相反
    
    常用场景:
    • 语义搜索: 找到最相似的文档
    • 推荐系统: 找到相似物品
    • 聚类: 分组相似文本
    """
    
    ax2.text(0.05, 0.95, similarity_formula, transform=ax2.transAxes, fontsize=9,
            verticalalignment='top', fontfamily='monospace')
    
    plt.suptitle('文本嵌入：将文本映射到向量空间', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_embeddings()

3.2 向量检索过程

python 复制代码

def visualize_vector_search():
    """可视化向量检索过程"""
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 1. 检索流程
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('向量检索流程', fontsize=12)
    
    steps = [
        ("查询", 0.15, 0.7),
        ("嵌入", 0.4, 0.7),
        ("向量", 0.65, 0.7),
        ("相似度", 0.65, 0.4),
        ("Top-K", 0.4, 0.4),
        ("结果", 0.15, 0.4)
    ]
    
    for label, x, y in steps:
        circle = plt.Circle((x, y), 0.07, color='lightblue', ec='black')
        ax1.add_patch(circle)
        ax1.text(x, y, label, ha='center', va='center', fontsize=7)
    
    # 连接线
    connections = [
        ((0.22, 0.7), (0.33, 0.7)),
        ((0.47, 0.7), (0.58, 0.7)),
        ((0.65, 0.63), (0.65, 0.47)),
        ((0.58, 0.4), (0.47, 0.4)),
        ((0.33, 0.4), (0.22, 0.4))
    ]
    
    for start, end in connections:
        ax1.annotate('', xy=end, xytext=start,
                    arrowprops=dict(arrowstyle='->', lw=1))
    
    # 2. 相似度搜索示意图
    ax2 = axes[1]
    
    # 模拟文档向量
    docs = ['文档A', '文档B', '文档C', '文档D', '文档E']
    similarities = [0.95, 0.82, 0.67, 0.45, 0.32]
    colors = ['darkgreen', 'green', 'lightgreen', 'lightcoral', 'lightcoral']
    
    bars = ax2.bar(docs, similarities, color=colors)
    ax2.set_ylabel('相似度')
    ax2.set_title('Top-K检索结果')
    ax2.axhline(y=0.7, color='red', linestyle='--', label='相似度阈值')
    
    for bar, sim in zip(bars, similarities):
        if sim >= 0.7:
            ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02,
                    f'{sim}', ha='center', va='bottom', fontsize=9)
    
    ax2.legend()
    
    plt.suptitle('向量检索：找到最相似的文档', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_vector_search()

四、RAG的增强策略

4.1 提示增强

python 复制代码

def visualize_prompt_augmentation():
    """可视化提示增强策略"""
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 1. 提示模板
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('RAG提示模板', fontsize=12)
    
    template = """
    系统指令: 你是一个AI助手，请基于以下检索到的信息回答问题。
    
    检索到的信息:
    {context}
    
    用户问题: {question}
    
    要求:
    1. 如果信息中有答案，请引用来源
    2. 如果信息不足，请如实告知
    3. 不要编造信息
    
    回答:
    """
    
    ax1.text(0.05, 0.95, template, transform=ax1.transAxes, fontsize=9,
            verticalalignment='top', fontfamily='monospace')
    
    # 2. 增强示例
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('增强前后对比', fontsize=12)
    
    # 原始回答
    ax2.text(0.05, 0.85, '❌ 无检索增强:', fontsize=9, fontweight='bold')
    ax2.text(0.1, 0.78, 'Q: 2024年AI领域有什么重要进展？', fontsize=8)
    ax2.text(0.1, 0.72, 'A: 我不知道2024年的具体事件...', fontsize=8, color='red')
    
    # 增强后回答
    ax2.text(0.05, 0.62, '✅ 有检索增强:', fontsize=9, fontweight='bold')
    ax2.text(0.1, 0.55, 'Q: 2024年AI领域有什么重要进展？', fontsize=8)
    ax2.text(0.1, 0.48, '检索到的信息:', fontsize=8, color='green')
    ax2.text(0.15, 0.42, '• GPT-5发布，支持100万token上下文', fontsize=7)
    ax2.text(0.15, 0.37, '• Sora文本生成视频模型发布', fontsize=7)
    ax2.text(0.1, 0.3, 'A: 根据最新信息，2024年AI领域的重要进展包括...', fontsize=8, color='green')
    
    plt.suptitle('提示增强：将检索信息注入提示', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_prompt_augmentation()

五、完整RAG系统实现

python 复制代码

def rag_system_demo():
    """RAG系统完整实现（概念演示）"""
    
    print("\n" + "=" * 60)
    print("RAG系统完整流程")
    print("=" * 60)
    
    code = """
# 完整的RAG系统实现

import chromadb
from sentence_transformers import SentenceTransformer
from transformers import pipeline

class RAGSystem:
    def __init__(self, embedding_model="all-MiniLM-L6-v2"):
        # 初始化嵌入模型
        self.embedder = SentenceTransformer(embedding_model)
        
        # 初始化向量数据库
        self.client = chromadb.Client()
        self.collection = self.client.create_collection("documents")
        
        # 初始化生成模型
        self.generator = pipeline("text-generation", model="gpt2")
    
    def add_documents(self, documents, ids):
        """添加文档到知识库"""
        embeddings = self.embedder.encode(documents).tolist()
        self.collection.add(
            embeddings=embeddings,
            documents=documents,
            ids=ids
        )
    
    def search(self, query, top_k=3):
        """检索相关文档"""
        query_embedding = self.embedder.encode([query]).tolist()
        results = self.collection.query(
            query_embeddings=query_embedding,
            n_results=top_k
        )
        return results['documents'][0]
    
    def generate_with_context(self, query, context):
        """基于上下文生成回答"""
        prompt = f"""基于以下信息回答问题:
        
        信息: {context}
        
        问题: {query}
        
        回答:"""
        
        response = self.generator(prompt, max_length=200)[0]['generated_text']
        return response
    
    def answer(self, query):
        """完整RAG流程"""
        # 1. 检索
        context = self.search(query)
        
        # 2. 生成
        answer = self.generate_with_context(query, context)
        
        return answer, context

# 使用示例
rag = RAGSystem()

# 添加文档
documents = [
    "RAG是检索增强生成技术",
    "向量数据库用于存储文本嵌入",
    "大模型可以基于检索信息回答"
]
rag.add_documents(documents, ["doc1", "doc2", "doc3"])

# 查询
query = "什么是RAG？"
answer, context = rag.answer(query)
print(f"问题: {query}")
print(f"检索到: {context}")
print(f"回答: {answer}")
"""
    
    print(code)

rag_system_demo()

六、学习检查清单

RAG核心概念

理解RAG为什么需要检索
掌握RAG的三阶段流程
了解文本嵌入和向量检索
知道向量数据库的作用

实现技术

会使用嵌入模型
了解向量数据库（Chroma、FAISS、Pinecone）
掌握提示增强技巧
能实现简单RAG系统

七、总结

RAG核心组件：

组件	作用	常用工具
嵌入模型	文本→向量	OpenAI Ada、BGE、M3E
向量数据库	存储和检索	Chroma、FAISS、Pinecone
大模型	生成回答	GPT、LLaMA、ChatGLM

RAG vs 微调：

特性	RAG	微调
知识更新	实时	需要重新训练
可解释性	可引用来源	黑盒
幻觉问题	降低	仍存在
计算成本	低	高
私有数据	天然支持	需要训练

记住：

RAG = 检索 + 增强 + 生成
向量检索是核心
知识库质量决定回答质量
RAG是降低幻觉的有效方法