03-深度学习基础：LangChain应用开发

LangChain应用开发：构建大模型应用框架

一、为什么需要LangChain？

1.1 大模型应用开发的挑战

python 复制代码

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("大模型应用开发的挑战")
print("=" * 60)

fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# 1. 传统开发 vs LangChain
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('传统开发 vs LangChain', fontsize=12)

# 传统开发
ax1.text(0.05, 0.85, '传统开发方式:', fontsize=10, fontweight='bold', color='red')
traditional = [
    "• 直接调用API",
    "• 手动管理提示模板",
    "• 硬编码逻辑流程",
    "• 重复造轮子",
    "• 难以维护和扩展"
]
y_pos = 0.75
for item in traditional:
    ax1.text(0.1, y_pos, item, fontsize=9)
    y_pos -= 0.08

# LangChain
ax1.text(0.55, 0.85, 'LangChain方式:', fontsize=10, fontweight='bold', color='green')
langchain_features = [
    "• 标准化组件",
    "• 可复用链",
    "• 内置工具集成",
    "• 模块化设计",
    "• 易于扩展"
]
y_pos = 0.75
for item in langchain_features:
    ax1.text(0.6, y_pos, item, fontsize=9)
    y_pos -= 0.08

# 2. LangChain核心价值
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('LangChain核心价值', fontsize=12)

values = [
    ("🔗 链式调用", "组合多个操作"),
    ("🧩 模块化", "可插拔组件"),
    ("🔧 工具集成", "搜索、API、数据库"),
    ("💾 记忆管理", "对话历史"),
    ("🤖 Agent", "自主决策执行")
]

y_pos = 0.75
for value, desc in values:
    ax2.text(0.1, y_pos, value, fontsize=10, fontweight='bold')
    ax2.text(0.4, y_pos, desc, fontsize=9)
    y_pos -= 0.1

plt.suptitle('LangChain：构建大模型应用的框架', fontsize=14)
plt.tight_layout()
plt.show()

print("\n💡 LangChain解决的痛点:")
print("   1. 统一接口：标准化大模型调用")
print("   2. 组件复用：避免重复开发")
print("   3. 流程编排：轻松构建复杂应用")
print("   4. 生态集成：与各种工具无缝对接")

二、LangChain核心组件

2.1 六大核心模块

python 复制代码

def visualize_core_components():
    """可视化LangChain六大核心模块"""
    
    fig, ax = plt.subplots(figsize=(14, 8))
    ax.axis('off')
    
    # 中心节点
    center = plt.Circle((0.5, 0.5), 0.12, color='lightcoral', ec='black')
    ax.add_patch(center)
    ax.text(0.5, 0.5, 'LangChain\n核心', ha='center', va='center', fontsize=10, fontweight='bold')
    
    # 六大模块
    modules = {
        'Models': (0.15, 0.75),
        'Prompts': (0.85, 0.75),
        'Chains': (0.15, 0.25),
        'Agents': (0.85, 0.25),
        'Memory': (0.5, 0.85),
        'Indexes': (0.5, 0.15)
    }
    
    module_descs = {
        'Models': '模型接口\n(LLM/Chat/Embeddings)',
        'Prompts': '提示管理\n(模板/示例选择器)',
        'Chains': '链式调用\n(组合多个操作)',
        'Agents': '智能代理\n(自主决策执行)',
        'Memory': '记忆管理\n(对话历史)',
        'Indexes': '索引检索\n(文档/向量库)'
    }
    
    for module, (x, y) in modules.items():
        circle = plt.Circle((x, y), 0.1, color='lightblue', ec='black')
        ax.add_patch(circle)
        ax.text(x, y, module_descs[module], ha='center', va='center', fontsize=8)
        
        # 连接到中心
        ax.annotate('', xy=(x, y), xytext=(0.5, 0.5),
                   arrowprops=dict(arrowstyle='-', color='gray', lw=1, alpha=0.5))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('LangChain六大核心模块', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_core_components()

2.2 各模块详解

python 复制代码

def visualize_modules_detail():
    """详细展示各模块功能"""
    
    fig, axes = plt.subplots(2, 3, figsize=(15, 10))
    
    # 1. Models
    ax1 = axes[0, 0]
    ax1.axis('off')
    ax1.set_title('Models - 模型接口', fontsize=11)
    ax1.text(0.05, 0.8, '支持的模型类型:', fontsize=9, fontweight='bold')
    ax1.text(0.1, 0.7, '• LLM (文本补全)', fontsize=8)
    ax1.text(0.1, 0.6, '• Chat (对话模型)', fontsize=8)
    ax1.text(0.1, 0.5, '• Embeddings (嵌入)', fontsize=8)
    
    # 2. Prompts
    ax2 = axes[0, 1]
    ax2.axis('off')
    ax2.set_title('Prompts - 提示管理', fontsize=11)
    ax2.text(0.05, 0.8, '提示模板:', fontsize=9, fontweight='bold')
    ax2.text(0.1, 0.7, 'from langchain.prompts import PromptTemplate', fontsize=7, fontfamily='monospace')
    ax2.text(0.1, 0.6, 'template = "回答: {question}"', fontsize=7, fontfamily='monospace')
    
    # 3. Chains
    ax3 = axes[0, 2]
    ax3.axis('off')
    ax3.set_title('Chains - 链式调用', fontsize=11)
    ax3.text(0.05, 0.8, '内置链类型:', fontsize=9, fontweight='bold')
    ax3.text(0.1, 0.7, '• LLMChain', fontsize=8)
    ax3.text(0.1, 0.6, '• SequentialChain', fontsize=8)
    ax3.text(0.1, 0.5, '• RouterChain', fontsize=8)
    
    # 4. Agents
    ax4 = axes[1, 0]
    ax4.axis('off')
    ax4.set_title('Agents - 智能代理', fontsize=11)
    ax4.text(0.05, 0.8, 'Agent类型:', fontsize=9, font-weight='bold')
    ax4.text(0.1, 0.7, '• Zero-shot ReAct', fontsize=8)
    ax4.text(0.1, 0.6, '• Conversational', fontsize=8)
    ax4.text(0.1, 0.5, '• OpenAI Functions', fontsize=8)
    
    # 5. Memory
    ax5 = axes[1, 1]
    ax5.axis('off')
    ax5.set_title('Memory - 记忆管理', fontsize=11)
    ax5.text(0.05, 0.8, '记忆类型:', fontsize=9, fontweight='bold')
    ax5.text(0.1, 0.7, '• ConversationBufferMemory', fontsize=8)
    ax5.text(0.1, 0.6, '• ConversationSummaryMemory', fontsize=8)
    ax5.text(0.1, 0.5, '• VectorStoreRetrieverMemory', fontsize=8)
    
    # 6. Indexes
    ax6 = axes[1, 2]
    ax6.axis('off')
    ax6.set_title('Indexes - 索引检索', fontsize=11)
    ax6.text(0.05, 0.8, '索引功能:', fontsize=9, fontweight='bold')
    ax6.text(0.1, 0.7, '• 文档加载器', fontsize=8)
    ax6.text(0.1, 0.6, '• 文本分割器', fontsize=8)
    ax6.text(0.1, 0.5, '• 向量存储', fontsize=8)
    
    plt.suptitle('LangChain六大模块详解', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_modules_detail()

三、快速开始：第一个LangChain应用

3.1 基础示例

python 复制代码

def basic_langchain_demo():
    """LangChain基础使用示例"""
    
    print("\n" + "=" * 60)
    print("LangChain基础示例")
    print("=" * 60)
    
    code = """
# 安装
# pip install langchain langchain-openai

from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

# 1. 初始化模型
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)

# 2. 创建提示模板
prompt = PromptTemplate(
    input_variables=["topic"],
    template="请用三句话解释什么是{topic}。"
)

# 3. 创建链
chain = LLMChain(llm=llm, prompt=prompt)

# 4. 运行
result = chain.run(topic="机器学习")
print(result)

# 输出示例:
# 机器学习是人工智能的一个子领域...
"""
    
    print(code)

basic_langchain_demo()

3.2 链式调用

python 复制代码

def visualize_chains():
    """可视化链式调用"""
    
    fig, ax = plt.subplots(figsize=(14, 4))
    ax.axis('off')
    
    # 链式调用流程
    steps = [
        ("输入", 0.1),
        ("Prompt\n模板", 0.3),
        ("LLM\n模型", 0.5),
        ("输出\n解析", 0.7),
        ("结果", 0.9)
    ]
    
    for step, x in steps:
        circle = plt.Circle((x, 0.5), 0.08, color='lightblue', ec='black')
        ax.add_patch(circle)
        ax.text(x, 0.5, step, ha='center', va='center', fontsize=8)
        
        if x < 0.85:
            ax.annotate('', xy=(x+0.18, 0.5), xytext=(x+0.1, 0.5),
                       arrowprops=dict(arrowstyle='->', lw=2))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('LLMChain：最简单的链式调用', fontsize=12)
    
    plt.tight_layout()
    plt.show()

visualize_chains()

四、高级功能

4.1 Agent智能代理

python 复制代码

def visualize_agent():
    """可视化Agent工作流程"""
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 1. Agent决策循环
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('Agent决策循环', fontsize=12)
    
    # 循环节点
    nodes = [
        ("用户\n输入", 0.5, 0.8),
        ("思考\n(LLM)", 0.5, 0.55),
        ("选择\n工具", 0.25, 0.35),
        ("执行\n工具", 0.5, 0.35),
        ("观察\n结果", 0.75, 0.35),
        ("生成\n输出", 0.5, 0.15)
    ]
    
    for label, x, y in nodes:
        circle = plt.Circle((x, y), 0.08, color='lightblue', ec='black')
        ax1.add_patch(circle)
        ax1.text(x, y, label, ha='center', va='center', fontsize=7)
    
    # 连接线
    ax1.annotate('', xy=(0.5, 0.72), xytext=(0.5, 0.63),
                arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.33, 0.47), xytext=(0.42, 0.52),
                arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.5, 0.43), xytext=(0.58, 0.47),
                arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.5, 0.23), xytext=(0.5, 0.28),
                arrowprops=dict(arrowstyle='->', lw=1))
    
    # 2. 工具示例
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('Agent可用工具', fontsize=12)
    
    tools = [
        ("🔍 搜索", "网络搜索、知识库"),
        ("📊 计算", "数学计算、代码执行"),
        ("📁 文件", "读写文件、数据处理"),
        ("🌐 API", "调用外部服务"),
        ("💾 数据库", "查询数据库"),
        ("📧 通信", "发送邮件、消息")
    ]
    
    y_pos = 0.75
    for tool, desc in tools:
        ax2.text(0.1, y_pos, tool, fontsize=9, fontweight='bold')
        ax2.text(0.3, y_pos, desc, fontsize=8)
        y_pos -= 0.1
    
    plt.suptitle('Agent：让LLM自主决策和调用工具', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_agent()

4.2 记忆管理

python 复制代码

def visualize_memory():
    """可视化记忆管理"""
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 1. 对话记忆
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('对话记忆', fontsize=12)
    
    # 对话历史
    history = [
        ("用户: 我叫张三", 0.2, 0.7),
        ("AI: 你好张三！", 0.6, 0.7),
        ("用户: 我喜欢编程", 0.2, 0.5),
        ("AI: 编程很有趣！", 0.6, 0.5),
        ("用户: 我叫什么？", 0.2, 0.3),
        ("AI: 你叫张三", 0.6, 0.3)
    ]
    
    for text, x, y in history:
        if text.startswith("用户"):
            color = 'lightblue'
        else:
            color = 'lightgreen'
        circle = plt.Circle((x, y), 0.1, color=color, ec='black')
        ax1.add_patch(circle)
        ax1.text(x, y, text, ha='center', va='center', fontsize=7)
    
    # 连接线
    ax1.annotate('', xy=(0.4, 0.7), xytext=(0.3, 0.7),
                arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.3, 0.6), xytext=(0.5, 0.65),
                arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.4, 0.5), xytext=(0.3, 0.5),
                arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.3, 0.4), xytext=(0.5, 0.45),
                arrowprops=dict(arrowstyle='->', lw=1))
    
    # 2. 记忆类型
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('记忆类型对比', fontsize=12)
    
    memory_types = """
    ╔══════════════════╦════════════════════════════════════════╗
    ║ 记忆类型         ║ 特点                                    ║
    ╠══════════════════╬════════════════════════════════════════╣
    ║ BufferMemory     ║ 保存所有对话，简单直接                  ║
    ║ WindowMemory     ║ 只保留最近K轮对话                       ║
    ║ SummaryMemory    ║ 总结压缩历史，节省token                 ║
    ║ VectorMemory     ║ 向量检索相关记忆                        ║
    ║ EntityMemory     ║ 提取和存储实体信息                      ║
    ╚══════════════════╩════════════════════════════════════════╝
    """
    
    ax2.text(0.05, 0.95, memory_types, transform=ax2.transAxes, fontsize=9,
            verticalalignment='top', fontfamily='monospace')
    
    plt.suptitle('Memory：让LLM记住对话上下文', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_memory()

五、完整实战：RAG问答系统

python 复制代码

def rag_qa_system_demo():
    """基于LangChain的RAG问答系统"""
    
    print("\n" + "=" * 60)
    print("LangChain RAG问答系统")
    print("=" * 60)
    
    code = """
# 完整RAG系统代码

from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

# 1. 加载文档
loader = TextLoader("knowledge.txt")
documents = loader.load()

# 2. 文档分割
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
texts = text_splitter.split_documents(documents)

# 3. 创建向量库
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(
    documents=texts,
    embedding=embeddings
)

# 4. 创建检索器
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# 5. 创建问答链
qa_chain = RetrievalQA.from_chain_type(
    llm=ChatOpenAI(model="gpt-3.5-turbo"),
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True
)

# 6. 提问
query = "什么是LangChain？"
result = qa_chain({"query": query})

print(f"问题: {query}")
print(f"答案: {result['result']}")
print(f"来源: {result['source_documents']}")
"""
    
    print(code)

rag_qa_system_demo()

六、学习检查清单

基础概念

理解LangChain解决的问题
掌握六大核心模块
会使用PromptTemplate
能创建LLMChain

高级功能

理解Agent工作原理
掌握Memory的使用
能构建RAG系统
会使用工具调用

七、总结

LangChain核心优势：

特性	传统方式	LangChain
代码量	大量重复	简洁高效
可维护性	差	好
扩展性	困难	容易
生态集成	手动实现	内置支持

学习路径：

复制代码

基础: Models → Prompts → Chains
进阶: Memory → Indexes → Agents
实战: RAG系统 → 对话机器人 → 自主Agent

记住：

LangChain是构建LLM应用的标准框架
链式调用是核心思想
Agent让LLM拥有行动能力
记忆是对话的关键