跟着官网学LangChain【第02章：提示词和消息】

第02章：提示词和消息(Prompts & Messages) - 源码分析

📚 源码分析目标

分析ChatPromptTemplate的实现原理
理解消息格式化和占位符处理
掌握提示词模板的渲染机制
了解消息历史的处理方式

🔍 ChatPromptTemplate源码分析

模板类结构

python 复制代码

# langchain_core/prompts/chat.py (简化版)
from typing import List, Tuple, Dict, Any
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain_core.runnables import Runnable

class ChatPromptTemplate(Runnable[Dict[str, Any], List[BaseMessage]]):
    """聊天提示词模板"""
    
    def __init__(self, messages: List[Tuple[str, str]]):
        """
        messages: [("role", "template"), ...]
        例如: [("system", "你是{role}"), ("user", "{input}")]
        """
        self.messages = messages
        self.input_variables = self._extract_variables()
    
    def _extract_variables(self) -> set:
        """提取模板中的所有变量"""
        variables = set()
        for role, template in self.messages:
            # 使用正则表达式提取 {variable} 格式的变量
            import re
            matches = re.findall(r'\{(\w+)\}', template)
            variables.update(matches)
        return variables
    
    def format_messages(self, **kwargs) -> List[BaseMessage]:
        """格式化消息"""
        formatted = []
        for role, template in self.messages:
            # 1. 渲染模板
            content = template.format(**kwargs)
            
            # 2. 创建对应的消息对象
            if role == "system":
                formatted.append(SystemMessage(content=content))
            elif role == "user" or role == "human":
                formatted.append(HumanMessage(content=content))
            elif role == "assistant" or role == "ai":
                formatted.append(AIMessage(content=content))
            else:
                # 默认使用HumanMessage
                formatted.append(HumanMessage(content=content))
        
        return formatted
    
    def _invoke(self, input: Dict[str, Any], config: dict = None, **kwargs) -> List[BaseMessage]:
        """实现Runnable接口"""
        return self.format_messages(**input)

占位符处理

python 复制代码

class ChatPromptTemplate:
    """支持占位符的提示词模板"""
    
    def __init__(self, messages: List[Tuple[str, Any]]):
        self.messages = messages
    
    def format_messages(self, **kwargs) -> List[BaseMessage]:
        """格式化消息，支持占位符"""
        formatted = []
        
        for role, template in self.messages:
            if isinstance(template, str):
                # 普通字符串模板
                content = template.format(**kwargs)
            elif hasattr(template, "format"):
                # 可格式化的对象（如另一个模板）
                content = template.format(**kwargs)
            elif isinstance(template, dict) and "placeholder" in template:
                # 占位符：从kwargs中获取历史消息
                placeholder_key = template["placeholder"]
                history = kwargs.get(placeholder_key, [])
                # 历史消息已经格式化，直接添加
                formatted.extend(history)
                continue
            else:
                content = str(template)
            
            # 创建消息对象
            msg = self._create_message(role, content)
            formatted.append(msg)
        
        return formatted
    
    def _create_message(self, role: str, content: str) -> BaseMessage:
        """根据角色创建消息对象"""
        if role == "system":
            return SystemMessage(content=content)
        elif role in ("user", "human"):
            return HumanMessage(content=content)
        elif role in ("assistant", "ai"):
            return AIMessage(content=content)
        else:
            return HumanMessage(content=content)

🔄 消息格式化流程

完整流程

python 复制代码

# 用户代码
prompt = ChatPromptTemplate([
    ("system", "你是{role}"),
    ("user", "{input}")
])
messages = prompt.format_messages(role="助手", input="你好")

# 内部执行流程：
# 1. prompt.format_messages(role="助手", input="你好")
#    ↓
# 2. 遍历messages列表
#    ↓
# 3. 对每个模板：
#    - ("system", "你是{role}") -> "你是助手" -> SystemMessage
#    - ("user", "{input}") -> "你好" -> HumanMessage
#    ↓
# 4. 返回 [SystemMessage(...), HumanMessage(...)]

流程图

字符串
占位符
对象
是
否
format_messages
遍历模板列表
模板类型
format格式化
从kwargs获取
调用format方法
创建消息对象
添加到结果列表
还有模板?
返回消息列表

💡 实际代码案例

案例1：自定义提示词模板

python 复制代码

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.messages import BaseMessage, SystemMessage, HumanMessage
from typing import List, Dict, Any

class CustomPromptTemplate:
    """自定义提示词模板，支持条件逻辑"""
    
    def __init__(self, base_template: str, conditions: Dict[str, str] = None):
        self.base_template = base_template
        self.conditions = conditions or {}
    
    def format(self, **kwargs) -> str:
        """格式化模板"""
        # 应用条件逻辑
        for condition, template in self.conditions.items():
            if self._evaluate_condition(condition, kwargs):
                return template.format(**kwargs)
        
        # 使用基础模板
        return self.base_template.format(**kwargs)
    
    def _evaluate_condition(self, condition: str, kwargs: Dict) -> bool:
        """评估条件（简单实现）"""
        # 例如: "length > 100" -> len(kwargs.get("input", "")) > 100
        try:
            return eval(condition, {"kwargs": kwargs, **kwargs})
        except:
            return False

# 使用
template = CustomPromptTemplate(
    base_template="回答：{input}",
    conditions={
        "len(kwargs.get('input', '')) > 100": "请简要回答：{input}",
        "kwargs.get('urgent', False)": "紧急！请立即回答：{input}"
    }
)

result1 = template.format(input="短问题")
result2 = template.format(input="这是一个很长很长的问题..." * 10)
print(result1)  # 回答：短问题
print(result2)  # 请简要回答：这是一个很长很长的问题...

案例2：消息历史管理器

python 复制代码

class MessageHistory:
    """消息历史管理器"""
    
    def __init__(self, max_history: int = 10):
        self.history: List[BaseMessage] = []
        self.max_history = max_history
    
    def add(self, message: BaseMessage):
        """添加消息"""
        self.history.append(message)
        
        # 限制历史长度
        if len(self.history) > self.max_history:
            # 保留系统消息和最近的对话
            system_messages = [m for m in self.history if isinstance(m, SystemMessage)]
            recent_messages = self.history[-self.max_history+len(system_messages):]
            self.history = system_messages + recent_messages
    
    def get_all(self) -> List[BaseMessage]:
        """获取所有历史消息"""
        return self.history.copy()
    
    def get_recent(self, n: int = 5) -> List[BaseMessage]:
        """获取最近的n条消息"""
        return self.history[-n:] if len(self.history) > n else self.history
    
    def clear(self):
        """清空历史（保留系统消息）"""
        system_messages = [m for m in self.history if isinstance(m, SystemMessage)]
        self.history = system_messages
    
    def to_dict_list(self) -> List[Dict]:
        """转换为字典列表（用于API调用）"""
        result = []
        for msg in self.history:
            if isinstance(msg, SystemMessage):
                result.append({"role": "system", "content": msg.content})
            elif isinstance(msg, HumanMessage):
                result.append({"role": "user", "content": msg.content})
            elif isinstance(msg, AIMessage):
                result.append({"role": "assistant", "content": msg.content})
        return result

# 使用
history = MessageHistory(max_history=5)

# 添加系统消息
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
history.add(SystemMessage(content="你是一个助手"))

# 模拟对话
history.add(HumanMessage(content="你好"))
history.add(AIMessage(content="你好！有什么可以帮助你的？"))
history.add(HumanMessage(content="我的名字是张三"))

print(f"历史长度: {len(history.get_all())}")
print(f"最近2条: {[m.content[:20] for m in history.get_recent(2)]}")

案例3：提示词模板缓存

python 复制代码

from functools import lru_cache
from typing import Tuple

class CachedPromptTemplate(ChatPromptTemplate):
    """带缓存的提示词模板"""
    
    def __init__(self, messages: List[Tuple[str, str]], cache_size: int = 128):
        super().__init__(messages)
        self._format_cache = {}
        self.cache_size = cache_size
    
    def format_messages(self, **kwargs) -> List[BaseMessage]:
        """带缓存的格式化"""
        # 创建缓存键（基于kwargs的排序元组）
        cache_key = tuple(sorted(kwargs.items()))
        
        # 检查缓存
        if cache_key in self._format_cache:
            # 返回缓存的副本（避免修改缓存）
            cached = self._format_cache[cache_key]
            return [type(msg)(content=msg.content) for msg in cached]
        
        # 格式化并缓存
        formatted = super().format_messages(**kwargs)
        
        # 限制缓存大小
        if len(self._format_cache) >= self.cache_size:
            # 删除最旧的条目（简单FIFO）
            oldest_key = next(iter(self._format_cache))
            del self._format_cache[oldest_key]
        
        self._format_cache[cache_key] = formatted
        return formatted

# 使用
prompt = CachedPromptTemplate([
    ("system", "你是{role}"),
    ("user", "{input}")
])

# 第一次调用：格式化并缓存
messages1 = prompt.format_messages(role="助手", input="你好")

# 第二次调用：从缓存获取（相同参数）
messages2 = prompt.format_messages(role="助手", input="你好")

print(messages1 == messages2)  # True（内容相同）

🎓 关键源码要点总结

ChatPromptTemplate: 使用元组列表定义模板结构
变量提取: 通过正则表达式提取模板变量
消息创建: 根据角色自动创建对应的消息类型
占位符支持: 支持从kwargs获取历史消息
Runnable接口: 模板本身也是Runnable，可以链式调用

📚 参考源码位置

langchain_core/prompts/chat.py - ChatPromptTemplate实现
langchain_core/messages/base.py - 消息类型定义
langchain_core/prompts/base.py - 基础提示词类