Part 4: Output Parsers(输出解析器)
文章目录
-
- [Part 4: Output Parsers(输出解析器)](#Part 4: Output Parsers(输出解析器))
-
- [4.1 概述](#4.1 概述)
- [4.2 StrOutputParser](#4.2 StrOutputParser)
- [4.3 PydanticOutputParser](#4.3 PydanticOutputParser)
- [4.4 JsonOutputParser](#4.4 JsonOutputParser)
- [4.5 CommaSeparatedListOutputParser](#4.5 CommaSeparatedListOutputParser)
- [4.6 StructuredOutput](#4.6 StructuredOutput)
- [4.7 自定义解析器](#4.7 自定义解析器)
- [4.8 输出解析最佳实践](#4.8 输出解析最佳实践)
4.1 概述
Output Parsers
输出解析器
StrOutputParser
字符串
PydanticOutputParser
Pydantic 对象
JsonOutputParser
JSON 字典
CommaSeparatedList
列表
StructuredOutput
with_structured_output
自定义解析器
4.2 StrOutputParser
python
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
model = ChatOpenAI(model="gpt-4o-mini")
parser = StrOutputParser()
# 单独使用
result = model.invoke("解释 Python")
parsed = parser.invoke(result) # 提取 content 字符串
# 在链中使用(最常见)
chain = ChatPromptTemplate.from_template("解释{concept}") | model | StrOutputParser()
result = chain.invoke({"concept": "机器学习"}) # 直接返回字符串
4.3 PydanticOutputParser
所有方法
| 方法 | 说明 | 返回类型 |
|---|---|---|
get_format_instructions() |
获取格式说明 | str |
invoke(text) |
解析文本 | BaseModel |
parse(text) |
解析文本 | BaseModel |
parse_result(result) |
解析模型输出 | BaseModel |
基本用法
python
from dotenv import load_dotenv
load_dotenv()
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
class MovieReview(BaseModel):
title: str = Field(description="电影标题")
rating: float = Field(description="评分,范围 1-10")
summary: str = Field(description="一句话简介")
recommendation: str = Field(description="推荐 或 不推荐")
pros: list[str] = Field(description="优点列表")
cons: list[str] = Field(description="缺点列表")
parser = PydanticOutputParser(pydantic_object=MovieReview)
print(parser.get_format_instructions())
prompt = ChatPromptTemplate.from_messages([
("system", "你是电影评论家。\n{format_instructions}"),
("human", "评价电影《星际穿越》"),
])
chain = prompt.partial(format_instructions=parser.get_format_instructions()) | ChatOpenAI(model="gpt-4o-mini", temperature=0.3) | parser
result = chain.invoke({})
print(f"标题: {result.title}")
print(f"评分: {result.rating}")
print(f"优点: {result.pros}")
print(f"字典: {result.model_dump()}")
嵌套模型
python
from pydantic import BaseModel, Field
from typing import Optional
class Address(BaseModel):
city: str = Field(description="城市")
street: str = Field(description="街道")
class Person(BaseModel):
name: str = Field(description="姓名")
age: int = Field(description="年龄")
address: Address = Field(description="地址")
parser = PydanticOutputParser(pydantic_object=Person)
prompt = ChatPromptTemplate.from_messages([
("system", "提取人物信息。\n{format_instructions}"),
("human", "张三30岁,住在北京中关村大街1号"),
])
chain = prompt.partial(format_instructions=parser.get_format_instructions()) | ChatOpenAI(model="gpt-4o-mini", temperature=0.0) | parser
result = chain.invoke({})
print(f"城市: {result.address.city}")
可选字段
python
from pydantic import BaseModel, Field
from typing import Optional
class BookInfo(BaseModel):
title: str = Field(description="书名")
author: str = Field(description="作者")
year: Optional[int] = Field(default=None, description="出版年份")
genre: Optional[str] = Field(default=None, description="类型")
parser = PydanticOutputParser(pydantic_object=BookInfo)
4.4 JsonOutputParser
python
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate
parser = JsonOutputParser()
prompt = ChatPromptTemplate.from_messages([
("system", "请以 JSON 格式输出,包含 name 和 age。"),
("human", "张三今年25岁"),
])
chain = prompt | ChatOpenAI(model="gpt-4o-mini", temperature=0.0) | parser
result = chain.invoke({})
print(f"类型: {type(result)}") # dict
print(f"姓名: {result.get('name')}")
4.5 CommaSeparatedListOutputParser
python
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import CommaSeparatedListOutputParser
from langchain_core.prompts import ChatPromptTemplate
parser = CommaSeparatedListOutputParser()
prompt = ChatPromptTemplate.from_messages([
("system", "{format_instructions}"),
("human", "列出 5 种编程语言"),
])
chain = prompt.partial(format_instructions=parser.get_format_instructions()) | ChatOpenAI(model="gpt-4o-mini", temperature=0.0) | parser
result = chain.invoke({})
print(f"类型: {type(result)}") # list
4.6 StructuredOutput
python
from dotenv import load_dotenv
load_dotenv()
from pydantic import BaseModel, Field
from typing import TypedDict
from langchain_openai import ChatOpenAI
model = ChatOpenAI(model="gpt-4o-mini", temperature=0.0)
# 方式一:Pydantic 模型
class Recipe(BaseModel):
name: str = Field(description="菜名")
ingredients: list[str] = Field(description="食材列表")
steps: list[str] = Field(description="烹饪步骤")
cooking_time: int = Field(description="烹饪时间(分钟)")
structured_model = model.with_structured_output(Recipe)
result = structured_model.invoke("教我做番茄炒蛋")
print(f"菜名: {result.name}")
print(f"食材: {result.ingredients}")
# 方式二:TypedDict(1.3 新特性)
class MovieInfo(TypedDict):
title: str
year: int
director: str
rating: float
typed_model = model.with_structured_output(MovieInfo)
result = typed_model.invoke("告诉我电影《盗梦空间》的信息")
print(result) # dict
# 方式三:JSON Schema
json_schema = {
"type": "object",
"properties": {
"sentiment": {"type": "string", "enum": ["positive", "negative", "neutral"]},
"confidence": {"type": "number"},
},
"required": ["sentiment", "confidence"],
}
schema_model = model.with_structured_output(json_schema)
result = schema_model.invoke("这个产品太好用了!")
print(result)
4.7 自定义解析器
python
import re
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.exceptions import OutputParserException
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
class NumberExtractorParser(BaseOutputParser[int]):
def parse(self, text: str) -> int:
match = re.search(r'\d+', text)
if match:
return int(match.group())
raise OutputParserException(f"无法提取数字: {text}")
@property
def _type(self) -> str:
return "number_extractor"
parser = NumberExtractorParser()
print(parser.parse("评分是 8.5 分")) # 8
# 在链中使用
prompt = ChatPromptTemplate.from_messages([
("system", "只回答数字"),
("human", "Python 有多少个关键字?"),
])
chain = prompt | ChatOpenAI(model="gpt-4o-mini", temperature=0.0) | NumberExtractorParser()
result = chain.invoke({})
print(f"结果: {result} (类型: {type(result).__name__})")
4.8 输出解析最佳实践
| 场景 | 推荐解析器 | 原因 |
|---|---|---|
| 只需要纯文本 | StrOutputParser |
简单直接 |
| 结构化数据 | with_structured_output |
最简洁可靠 |
| 复杂嵌套结构 | PydanticOutputParser |
支持嵌套 |
| 灵活 JSON | JsonOutputParser |
无需定义模型 |
| 列表 | CommaSeparatedListOutputParser |
简单列表 |
| 特殊格式 | 自定义解析器 | 完全自定义 |