RAG系统实战:检索增强生成技术详解
作者的话:大语言模型虽然强大,但存在知识截止、幻觉等问题。RAG(检索增强生成)技术通过将外部知识库与LLM结合,让模型能够基于最新、最准确的信息回答问题。本文将深入解析RAG的原理、架构设计,并带你从零构建一个完整的RAG系统!
一、什么是RAG?
1.1 RAG解决的问题
传统LLM的局限:
- 知识截止:模型训练后无法获取新知识
- 幻觉问题:生成看似合理但错误的内容
- 无法溯源:不知道答案来自哪里
- 领域知识不足:专业领域表现差
RAG的优势:
-
实时获取最新知识
-
减少幻觉,提高准确性
-
可追溯信息来源
-
支持私有/专业领域知识
传统LLM RAG系统
↓ ↓
用户提问 → LLM生成答案 用户提问 → 检索相关知识 → LLM基于知识生成答案
↓ ↓
可能过时/错误 基于最新准确信息
1.2 RAG架构概览
RAG系统架构
┌─────────────────────────────────────────────────────────────┐
│ 查询处理 │
│ 查询理解、扩展、重写 │
└─────────────────────────────┬───────────────────────────────┘
↓
┌─────────────────────────────────────────────────────────────┐
│ 检索模块 (Retriever) │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ 向量数据库 │ │ 关键词检索 │ │ 混合检索 │ │
│ │ (Embedding)│ │ (BM25) │ │ (Hybrid) │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
└─────────────────────────────┬───────────────────────────────┘
↓
┌─────────────────────────────────────────────────────────────┐
│ 重排序模块 (Reranker) │
│ 对检索结果进行精排,提高相关性 │
└─────────────────────────────┬───────────────────────────────┘
↓
┌─────────────────────────────────────────────────────────────┐
│ 生成模块 (Generator) │
│ LLM基于检索到的上下文生成答案 │
└─────────────────────────────────────────────────────────────┘
二、文档处理与向量化
2.1 文档加载与解析
# 文档加载器
from langchain.document_loaders import (
TextLoader,
PyPDFLoader,
UnstructuredWordDocumentLoader,
CSVLoader
)
import os
class DocumentLoader:
"""文档加载器"""
@staticmethod
def load_file(file_path: str):
"""根据文件类型选择加载器"""
ext = os.path.splitext(file_path)[1].lower()
loaders = {
'.txt': TextLoader,
'.pdf': PyPDFLoader,
'.docx': UnstructuredWordDocumentLoader,
'.csv': CSVLoader
}
loader_class = loaders.get(ext)
if not loader_class:
raise ValueError(f"不支持的文件类型: {ext}")
loader = loader_class(file_path)
return loader.load()
@staticmethod
def load_directory(directory: str):
"""加载整个目录"""
documents = []
for root, dirs, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
try:
docs = DocumentLoader.load_file(file_path)
documents.extend(docs)
except Exception as e:
print(f"加载失败 {file_path}: {e}")
return documents
# 使用示例
docs = DocumentLoader.load_directory("./knowledge_base")
print(f"加载了 {len(docs)} 个文档")
2.2 文档切分
from langchain.text_splitter import (
RecursiveCharacterTextSplitter,
CharacterTextSplitter,
TokenTextSplitter
)
class DocumentSplitter:
"""文档切分器"""
@staticmethod
def recursive_split(
documents,
chunk_size=500,
chunk_overlap=50
):
"""
递归字符切分
策略:按优先级尝试分隔符(\n\n, \n, 空格)
优点:保持段落/句子完整性
"""
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
separators=["\n\n", "\n", " ", ""]
)
return splitter.split_documents(documents)
@staticmethod
def token_split(documents, chunk_size=256, chunk_overlap=20):
"""基于Token的切分"""
splitter = TokenTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap
)
return splitter.split_documents(documents)
@staticmethod
def semantic_split(documents, embedding_model):
"""
语义切分(高级)
在语义边界处切分,保持内容连贯性
"""
# 先按句子切分
sentences = []
for doc in documents:
sentences.extend(doc.page_content.split("。"))
# 计算相邻句子的语义相似度
embeddings = embedding_model.encode(sentences)
# 在相似度较低处切分
chunks = []
current_chunk = []
for i, sentence in enumerate(sentences):
if i > 0:
similarity = cosine_similarity(
[embeddings[i-1]],
[embeddings[i]]
)[0][0]
# 相似度低于阈值,开启新chunk
if similarity < 0.7 and current_chunk:
chunks.append("。".join(current_chunk) + "。")
current_chunk = []
current_chunk.append(sentence)
if current_chunk:
chunks.append("。".join(current_chunk) + "。")
return chunks
# 切分示例
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = text_splitter.split_documents(docs)
print(f"切分为 {len(chunks)} 个文本块")
2.3 向量化与存储
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
import numpy as np
class VectorStore:
"""向量存储"""
def __init__(
self,
collection_name="rag_collection",
embedding_model="BAAI/bge-large-zh"
):
# 加载Embedding模型
self.encoder = SentenceTransformer(embedding_model)
# 初始化ChromaDB
self.client = chromadb.Client(Settings(
chroma_db_impl="duckdb+parquet",
persist_directory="./chroma_db"
))
# 创建或获取集合
try:
self.collection = self.client.create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"}
)
except:
self.collection = self.client.get_collection(collection_name)
def add_documents(self, chunks):
"""添加文档到向量库"""
texts = [chunk.page_content for chunk in chunks]
metadatas = [chunk.metadata for chunk in chunks]
ids = [f"doc_{i}" for i in range(len(chunks))]
# 编码
embeddings = self.encoder.encode(texts).tolist()
# 添加到集合
self.collection.add(
embeddings=embeddings,
documents=texts,
metadatas=metadatas,
ids=ids
)
print(f"成功添加 {len(chunks)} 个文档块")
def search(self, query, top_k=5):
"""检索相关文档"""
# 编码查询
query_embedding = self.encoder.encode([query]).tolist()
# 检索
results = self.collection.query(
query_embeddings=query_embedding,
n_results=top_k,
include=["documents", "metadatas", "distances"]
)
return results
def delete(self, ids):
"""删除文档"""
self.collection.delete(ids=ids)
def update(self, id, new_chunk):
"""更新文档"""
self.delete([id])
self.add_documents([new_chunk])
# 使用示例
vector_store = VectorStore()
vector_store.add_documents(chunks)
# 检索
results = vector_store.search("什么是RAG?", top_k=3)
for doc, metadata in zip(results['documents'][0], results['metadatas'][0]):
print(f"内容: {doc[:100]}...")
print(f"来源: {metadata.get('source', 'unknown')}")
print("---")
三、检索策略
3.1 向量检索
class VectorRetriever:
"""向量检索器"""
def __init__(self, vector_store):
self.vector_store = vector_store
def retrieve(
self,
query: str,
top_k: int = 5,
score_threshold: float = 0.7
):
"""
向量相似度检索
Args:
query: 查询文本
top_k: 返回结果数量
score_threshold: 相似度阈值
"""
results = self.vector_store.search(query, top_k=top_k*2)
documents = results['documents'][0]
distances = results['distances'][0]
metadatas = results['metadatas'][0]
# 过滤低相似度结果
filtered_results = []
for doc, dist, meta in zip(documents, distances, metadatas):
# 将距离转换为相似度分数(余弦距离→相似度)
score = 1 - dist
if score >= score_threshold:
filtered_results.append({
'content': doc,
'score': score,
'metadata': meta
})
return filtered_results[:top_k]
# 向量检索示例
retriever = VectorRetriever(vector_store)
results = retriever.retrieve("RAG的工作原理是什么?", top_k=3)
3.2 关键词检索(BM25)
from rank_bm25 import BM25Okapi
import jieba
class BM25Retriever:
"""BM25关键词检索器"""
def __init__(self, documents):
self.documents = documents
# 分词
self.tokenized_docs = [
list(jieba.cut(doc.page_content))
for doc in documents
]
# 构建BM25索引
self.bm25 = BM25Okapi(self.tokenized_docs)
def retrieve(self, query: str, top_k: int = 5):
"""BM25检索"""
# 查询分词
tokenized_query = list(jieba.cut(query))
# 计算分数
scores = self.bm25.get_scores(tokenized_query)
# 获取top-k
top_indices = np.argsort(scores)[-top_k:][::-1]
results = []
for idx in top_indices:
if scores[idx] > 0:
results.append({
'content': self.documents[idx].page_content,
'score': scores[idx],
'metadata': self.documents[idx].metadata
})
return results
3.3 混合检索
class HybridRetriever:
"""混合检索:向量检索 + 关键词检索"""
def __init__(self, vector_retriever, bm25_retriever, alpha=0.5):
self.vector_retriever = vector_retriever
self.bm25_retriever = bm25_retriever
self.alpha = alpha # 向量检索权重
def retrieve(self, query: str, top_k: int = 5):
"""混合检索"""
# 两种检索各取更多结果
vector_results = self.vector_retriever.retrieve(query, top_k=top_k*2)
bm25_results = self.bm25_retriever.retrieve(query, top_k=top_k*2)
# 归一化分数
all_results = {}
# 处理向量检索结果
for i, result in enumerate(vector_results):
doc_id = result['metadata'].get('source', '') + str(i)
all_results[doc_id] = {
'content': result['content'],
'metadata': result['metadata'],
'vector_score': result['score'],
'bm25_score': 0
}
# 处理BM25结果
for i, result in enumerate(bm25_results):
doc_id = result['metadata'].get('source', '') + str(i)
if doc_id in all_results:
all_results[doc_id]['bm25_score'] = result['score']
else:
all_results[doc_id] = {
'content': result['content'],
'metadata': result['metadata'],
'vector_score': 0,
'bm25_score': result['score']
}
# 融合分数
for doc_id in all_results:
v_score = all_results[doc_id]['vector_score']
b_score = all_results[doc_id]['bm25_score']
# 归一化BM25分数到0-1
b_score_norm = min(b_score / 10, 1.0)
# 加权融合
all_results[doc_id]['final_score'] = (
self.alpha * v_score +
(1 - self.alpha) * b_score_norm
)
# 排序返回
sorted_results = sorted(
all_results.values(),
key=lambda x: x['final_score'],
reverse=True
)
return sorted_results[:top_k]
四、查询优化
4.1 查询重写
class QueryRewriter:
"""查询重写器"""
def __init__(self, llm_client):
self.llm = llm_client
def expand(self, query: str) -> List[str]:
"""查询扩展:生成多个相关查询"""
prompt = f"""基于以下查询,生成3个语义相同但表述不同的查询:
查询: {query}
请以JSON格式输出:["查询1", "查询2", "查询3"]"""
response = self.llm.complete(prompt)
try:
expanded = json.loads(response)
return [query] + expanded
except:
return [query]
def decompose(self, query: str) -> List[str]:
"""查询分解:将复杂查询拆分为子查询"""
prompt = f"""将以下复杂查询分解为2-3个简单的子查询:
查询: {query}
子查询列表(JSON格式):"""
response = self.llm.complete(prompt)
try:
sub_queries = json.loads(response)
return sub_queries
except:
return [query]
def hyde(self, query: str) -> str:
"""
Hypothetical Document Embeddings (HyDE)
生成假设答案,用答案去检索
"""
prompt = f"""请基于你的知识,简要回答以下问题(2-3句话):
问题: {query}
答案:"""
hypothetical_answer = self.llm.complete(prompt)
return hypothetical_answer
# 使用示例
rewriter = QueryRewriter(llm)
# 扩展查询
expanded_queries = rewriter.expand("什么是RAG技术?")
# 检索所有扩展查询的结果
all_results = []
for q in expanded_queries:
results = retriever.retrieve(q)
all_results.extend(results)
# 去重并排序
# ...
4.2 多路召回
class MultiRecallRetriever:
"""多路召回检索器"""
def __init__(self, retrievers: Dict[str, Any]):
"""
Args:
retrievers: 多种检索器的字典
"""
self.retrievers = retrievers
def retrieve(
self,
query: str,
top_k: int = 5,
strategy: str = "fusion"
):
"""
多路召回
strategy:
- fusion: 分数融合
- rrf: 倒数排序融合
"""
# 各检索器召回
all_results = {}
for name, retriever in self.retrievers.items():
results = retriever.retrieve(query, top_k=top_k*2)
all_results[name] = results
if strategy == "fusion":
return self._score_fusion(all_results, top_k)
elif strategy == "rrf":
return self._reciprocal_rank_fusion(all_results, top_k)
else:
raise ValueError(f"未知策略: {strategy}")
def _score_fusion(self, all_results, top_k):
"""分数融合"""
# 实现类似HybridRetriever的融合逻辑
pass
def _reciprocal_rank_fusion(self, all_results, top_k, k=60):
"""
倒数排序融合(RRF)
RRF分数 = Σ(1 / (k + rank))
"""
doc_scores = {}
for retriever_name, results in all_results.items():
for rank, result in enumerate(results, start=1):
doc_id = result['metadata'].get('source', '')
if doc_id not in doc_scores:
doc_scores[doc_id] = {
'content': result['content'],
'metadata': result['metadata'],
'rrf_score': 0
}
# RRF公式
doc_scores[doc_id]['rrf_score'] += 1 / (k + rank)
# 排序
sorted_results = sorted(
doc_scores.values(),
key=lambda x: x['rrf_score'],
reverse=True
)
return sorted_results[:top_k]
五、重排序与上下文构建
5.1 重排序模型
from sentence_transformers import CrossEncoder
class Reranker:
"""重排序器"""
def __init__(self, model_name="BAAI/bge-reranker-large"):
self.model = CrossEncoder(model_name)
def rerank(self, query: str, documents: List[Dict], top_k: int = 5):
"""
对检索结果进行精排
使用Cross-Encoder计算查询-文档的精确相关性
"""
# 构建query-doc对
pairs = [[query, doc['content']] for doc in documents]
# 预测分数
scores = self.model.predict(pairs)
# 添加分数并排序
for doc, score in zip(documents, scores):
doc['rerank_score'] = score
sorted_docs = sorted(
documents,
key=lambda x: x['rerank_score'],
reverse=True
)
return sorted_docs[:top_k]
# 使用流程
retriever = HybridRetriever(vector_retriever, bm25_retriever)
reranker = Reranker()
# 1. 粗排
initial_results = retriever.retrieve(query, top_k=20)
# 2. 精排
final_results = reranker.rerank(query, initial_results, top_k=5)
5.2 上下文构建
class ContextBuilder:
"""上下文构建器"""
def __init__(self, max_tokens=3000):
self.max_tokens = max_tokens
self.token_counter = TokenCounter()
def build(self, query: str, documents: List[Dict]) -> str:
"""
构建LLM输入上下文
策略:
1. 按相关度排序
2. 动态调整文档数量,控制token数
3. 添加文档来源标记
"""
context_parts = []
total_tokens = 0
# 系统提示占用
system_tokens = self.token_counter.count(self._get_system_prompt())
query_tokens = self.token_counter.count(query)
available_tokens = self.max_tokens - system_tokens - query_tokens - 200
for i, doc in enumerate(documents, 1):
# 格式化文档
doc_text = f"\n[文档{i}] {doc['content']}\n来源: {doc['metadata'].get('source', 'unknown')}\n"
doc_tokens = self.token_counter.count(doc_text)
if total_tokens + doc_tokens > available_tokens:
break
context_parts.append(doc_text)
total_tokens += doc_tokens
return "\n".join(context_parts)
def _get_system_prompt(self) -> str:
return """基于以下参考文档回答问题。如果文档中没有相关信息,请明确说明。
参考文档:
{context}
问题:{query}
请基于上述文档回答,并标注信息来源。"""
class TokenCounter:
"""Token计数器"""
def __init__(self, model="gpt-3.5-turbo"):
self.encoding = tiktoken.encoding_for_model(model)
def count(self, text: str) -> int:
return len(self.encoding.encode(text))
六、完整RAG系统
6.1 RAG Pipeline
from typing import Optional
import time
class RAGPipeline:
"""RAG完整流程"""
def __init__(
self,
vector_store: VectorStore,
llm_client,
reranker: Optional[Reranker] = None,
query_rewriter: Optional[QueryRewriter] = None,
use_hybrid: bool = True
):
self.vector_store = vector_store
self.llm = llm_client
self.reranker = reranker
self.query_rewriter = query_rewriter
# 初始化检索器
self.vector_retriever = VectorRetriever(vector_store)
if use_hybrid:
# 需要预先构建BM25索引
self.bm25_retriever = None # 懒加载
self.retriever = None
else:
self.retriever = self.vector_retriever
self.context_builder = ContextBuilder()
def query(self, query: str) -> Dict:
"""
执行RAG查询
Returns:
{
'answer': 生成的答案,
'sources': 引用的文档来源,
'context': 使用的上下文,
'latency': 响应时间
}
"""
start_time = time.time()
# 1. 查询重写(可选)
if self.query_rewriter:
expanded_queries = self.query_rewriter.expand(query)
else:
expanded_queries = [query]
# 2. 检索
all_results = []
for q in expanded_queries:
results = self.retriever.retrieve(q, top_k=10)
all_results.extend(results)
# 去重
seen = set()
unique_results = []
for r in all_results:
doc_id = r['metadata'].get('source', '') + r['content'][:50]
if doc_id not in seen:
seen.add(doc_id)
unique_results.append(r)
# 3. 重排序(可选)
if self.reranker:
final_results = self.reranker.rerank(query, unique_results, top_k=5)
else:
final_results = unique_results[:5]
# 4. 构建上下文
context = self.context_builder.build(query, final_results)
# 5. 生成答案
answer = self._generate(query, context)
# 6. 提取来源
sources = [
{
'source': r['metadata'].get('source', 'unknown'),
'score': r.get('rerank_score', r.get('score', 0))
}
for r in final_results
]
latency = time.time() - start_time
return {
'answer': answer,
'sources': sources,
'context': context,
'latency': f"{latency:.2f}s"
}
def _generate(self, query: str, context: str) -> str:
"""调用LLM生成答案"""
prompt = f"""基于以下参考文档回答问题。如果文档中没有相关信息,请明确说明。
参考文档:
{context}
问题:{query}
请基于上述文档回答,并标注信息来源。回答:"""
return self.llm.complete(prompt)
# 使用示例
rag = RAGPipeline(
vector_store=vector_store,
llm_client=llm,
reranker=Reranker(),
query_rewriter=QueryRewriter(llm),
use_hybrid=True
)
result = rag.query("什么是RAG技术?")
print(f"答案: {result['answer']}")
print(f"来源: {result['sources']}")
print(f"耗时: {result['latency']}")
6.2 RAG评估
class RAGEvaluator:
"""RAG系统评估器"""
def __init__(self, rag_pipeline):
self.rag = rag_pipeline
def evaluate_retrieval(
self,
test_queries: List[Dict],
top_k: int = 5
):
"""
评估检索质量
Metrics:
- Recall@K: 相关文档被检索到的比例
- MRR: 平均倒数排名
- NDCG: 归一化折损累计增益
"""
recalls = []
mrrs = []
for test in test_queries:
query = test['query']
relevant_docs = set(test['relevant_docs'])
# 检索
results = self.rag.retriever.retrieve(query, top_k=top_k)
retrieved_docs = [r['metadata'].get('source') for r in results]
# 计算Recall@K
hits = len(relevant_docs & set(retrieved_docs))
recall = hits / len(relevant_docs) if relevant_docs else 0
recalls.append(recall)
# 计算MRR
mrr = 0
for i, doc in enumerate(retrieved_docs, 1):
if doc in relevant_docs:
mrr = 1 / i
break
mrrs.append(mrr)
return {
'recall@k': np.mean(recalls),
'mrr': np.mean(mrrs)
}
def evaluate_generation(
self,
test_queries: List[Dict]
):
"""
评估生成质量
Metrics:
- Faithfulness: 答案是否忠实于文档
- Answer Relevance: 答案与问题的相关性
- Context Precision: 使用的上下文是否精确
"""
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'])
rouge_scores = []
for test in test_queries:
query = test['query']
reference = test['reference_answer']
# RAG生成
result = self.rag.query(query)
generated = result['answer']
# 计算ROUGE分数
scores = scorer.score(reference, generated)
rouge_scores.append(scores['rougeL'].fmeasure)
return {
'rouge_l': np.mean(rouge_scores)
}
七、RAG进阶技术
7.1 多模态RAG
class MultimodalRAG:
"""多模态RAG:支持文本、图像"""
def __init__(self):
self.text_store = VectorStore(collection_name="text")
self.image_store = VectorStore(
collection_name="image",
embedding_model="clip-ViT-B-32"
)
self.caption_model = None # 图像描述模型
def add_image(self, image_path: str):
"""添加图像到知识库"""
# 生成图像描述
caption = self.caption_model.generate(image_path)
# 存储图像向量
self.image_store.add_documents([{
'page_content': caption,
'metadata': {'type': 'image', 'path': image_path}
}])
def query(self, query: str, query_image=None):
"""多模态查询"""
# 文本检索
text_results = self.text_store.search(query)
# 如果有查询图像,进行图像检索
if query_image:
image_results = self.image_store.search(query)
else:
image_results = []
# 融合结果
# ...
7.2 Agentic RAG
class AgenticRAG:
"""Agent增强的RAG"""
def __init__(self, base_rag: RAGPipeline, llm_client):
self.rag = base_rag
self.llm = llm_client
def query(self, query: str) -> str:
"""
Agent风格的迭代RAG
1. 分析问题类型
2. 决定检索策略
3. 迭代检索直到满意
4. 生成最终答案
"""
# 分析查询
analysis = self._analyze_query(query)
if analysis['type'] == 'factual':
# 简单事实查询,单次RAG
return self.rag.query(query)['answer']
elif analysis['type'] == 'complex':
# 复杂查询,分解+多步RAG
sub_queries = self._decompose_query(query)
sub_answers = []
for sq in sub_queries:
result = self.rag.query(sq)
sub_answers.append({
'query': sq,
'answer': result['answer'],
'sources': result['sources']
})
# 综合答案
return self._synthesize(query, sub_answers)
elif analysis['type'] == 'comparative':
# 比较类查询
entities = analysis['entities']
comparisons = []
for entity in entities:
result = self.rag.query(f"{entity}的{analysis['attribute']}是什么?")
comparisons.append({
'entity': entity,
'info': result['answer']
})
return self._generate_comparison(comparisons)
def _analyze_query(self, query: str) -> Dict:
"""分析查询类型"""
prompt = f"""分析以下查询的类型:
查询: {query}
输出JSON格式:
{{
"type": "factual/complex/comparative/summarization",
"entities": ["实体1", "实体2"],
"attribute": "比较的属性"
}}"""
response = self.llm.complete(prompt)
return json.loads(response)
八、总结
核心要点
- RAG价值:解决LLM知识截止、幻觉问题,实现实时知识增强
- 文档处理:加载→切分→向量化,切分策略影响检索质量
- 检索策略:向量检索+关键词检索混合,多路召回提高覆盖率
- 查询优化:查询扩展、分解、HyDE等技术提升检索效果
- 重排序:Cross-Encoder精排,提高上下文质量
- 评估体系:检索指标(Recall/MRR)+ 生成指标(Faithfulness/ROUGE)
RAG优化清单
| 层级 | 优化点 | 效果 |
|---|---|---|
| 文档处理 | 语义切分、元数据增强 | +10-15% |
| Embedding | 领域微调、稀疏向量 | +5-10% |
| 检索 | 混合检索、查询重写 | +15-20% |
| 重排 | Cross-Encoder精排 | +10-15% |
| 生成 | Prompt工程、引用格式 | +5-10% |
学习路径
Level 1: 基础RAG
├── 文档加载与切分
├── 向量存储与检索
└── 基础Pipeline搭建
Level 2: 优化RAG
├── 混合检索
├── 查询重写
├── 重排序
└── 评估体系
Level 3: 高级RAG
├── 多模态RAG
├── Agentic RAG
├── GraphRAG
└── 自适应RAG
下一篇预告:【第53篇】大模型微调实战:LoRA与QLoRA技术详解(万字长文+完整代码实现)
本文为系列第52篇,详细介绍了RAG系统的架构设计和实战实现。有任何问题欢迎在评论区交流!
标签:RAG、检索增强生成、向量数据库、Embedding、LLM应用、知识库