RAG from Scratch-优化-routing

路由与查询构建篇

Part 10: Logical & Semantic Routing

逻辑路由 (Logical Routing)

使用函数调用进行分类路由。

python 复制代码

from typing import Literal
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

# 1. 定义路由数据模型
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""
    datasource: Literal["python_docs", "js_docs", "golang_docs"] = Field(
        ...,
        description="Given a user question choose which datasource would be most relevant"
    )

# 2. 使用结构化输出
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = llm.with_structured_output(RouteQuery)

# 3. 创建路由 Prompt
system = """You are an expert at routing a user question to the appropriate data source.
Based on the programming language the question is referring to, route it to the relevant data source."""

prompt = ChatPromptTemplate.from_messages([
    ("system", system),
    ("human", "{question}"),
])

router = prompt | structured_llm

# 4. 测试路由
question = """Why doesn't the following code work:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(["human", "speak in {language}"])
"""

result = router.invoke({"question": question})
print(result.datasource)  # 输出: python_docs

python 复制代码

# 5. 根据路由结果执行不同逻辑
def choose_route(result):
    if "python_docs" in result.datasource.lower():
        return "chain for python_docs"
    elif "js_docs" in result.datasource.lower():
        return "chain for js_docs"
    else:
        return "golang_docs"

from langchain_core.runnables import RunnableLambda

full_chain = router | RunnableLambda(choose_route)
result = full_chain.invoke({"question": question})
print(result)  # 输出: chain for python_docs

语义路由 (Semantic Routing)

基于 Embedding 相似度进行路由。

python 复制代码

from langchain.utils.math import cosine_similarity
from langchain_openai import OpenAIEmbeddings

# 1. 定义不同领域的 Prompt 模板
physics_template = """You are a very smart physics professor.
You are great at answering questions about physics in a concise manner.
Here is a question: {query}"""

math_template = """You are a very good mathematician.
You are great at answering math questions.
Here is a question: {query}"""

# 2. 对 Prompt 进行 Embedding
embeddings = OpenAIEmbeddings()
prompt_templates = [physics_template, math_template]
prompt_embeddings = embeddings.embed_documents(prompt_templates)

# 3. 路由函数
def prompt_router(input):
    query_embedding = embeddings.embed_query(input["query"])
    similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]
    most_similar = prompt_templates[similarity.argmax()]
    print("Using MATH" if most_similar == math_template else "Using PHYSICS")
    return PromptTemplate.from_template(most_similar)

# 4. 构建 Chain
chain = (
    {"query": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | ChatOpenAI()
    | StrOutputParser()
)

# 5. 测试
answer = chain.invoke("What's a black hole")
# 输出: Using PHYSICS
# A black hole is a region in space where gravity is so strong...

Part 11: Query Structuring

核心思想

将自然语言查询转换为结构化的数据库查询，支持元数据过滤。

定义查询结构

python 复制代码

import datetime
from typing import Optional
from langchain_core.pydantic_v1 import BaseModel, Field

class TutorialSearch(BaseModel):
    """Search over a database of tutorial videos."""

    content_search: str = Field(
        ...,
        description="Similarity search query applied to video transcripts.",
    )
    title_search: str = Field(
        ...,
        description="Alternate version to apply to video titles.",
    )
    min_view_count: Optional[int] = Field(
        None,
        description="Minimum view count filter, inclusive.",
    )
    max_view_count: Optional[int] = Field(
        None,
        description="Maximum view count filter, exclusive.",
    )
    earliest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Earliest publish date filter, inclusive.",
    )
    latest_publish_date: Optional[datetime.date] = Field(
        None,
        description="Latest publish date filter, exclusive.",
    )
    min_length_sec: Optional[int] = Field(
        None,
        description="Minimum video length in seconds.",
    )
    max_length_sec: Optional[int] = Field(
        None,
        description="Maximum video length in seconds.",
    )

查询分析器

python 复制代码

system = """You are an expert at converting user questions into database queries.
You have access to a database of tutorial videos about a software library.
Given a question, return a database query optimized to retrieve the most relevant results."""

prompt = ChatPromptTemplate.from_messages([
    ("system", system),
    ("human", "{question}"),
])

llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm = llm.with_structured_output(TutorialSearch)
query_analyzer = prompt | structured_llm

测试示例

python 复制代码

# 示例 1: 简单查询
query_analyzer.invoke({"question": "rag from scratch"})
# 输出:
# content_search: rag from scratch
# title_search: rag from scratch

# 示例 2: 带时间过滤
query_analyzer.invoke({"question": "videos on chat langchain published in 2023"})
# 输出:
# content_search: chat langchain
# title_search: chat langchain
# earliest_publish_date: 2023-01-01
# latest_publish_date: 2024-01-01

# 示例 3: 带时长过滤
query_analyzer.invoke({
    "question": "how to use multi-modal models in an agent, only videos under 5 minutes"
})
# 输出:
# content_search: multi-modal models agent
# title_search: multi-modal models agent
# max_length_sec: 300