使用Milvus作为向量数据库

参考: https://milvus.io/docs/zh/build-rag-with-milvus.md

先说流程:

读取文档,并分段
将文档使用嵌入模型进行向量化处理
将向量化后的文档存入Milvus
将查询语句进行向量化
使用向量化的查询语句在Milvus种查询

先说前置性东西

文档: 使用milvus中文文档

嵌入式模型: bge-small-zh-v1.5 从魔搭上下载

整体代码如下:

复制代码

# 加载文档
from glob import glob
from langchain_huggingface import HuggingFaceEmbeddings
from pymilvus import MilvusClient
from tqdm import tqdm


"""
文档下载, 原文是英文文档,这个是中文文档
https://github.com/aidoczh/milvus-doc-zh#

"""


def get_all_docs():
    # 加载所有FAQ文档并按标题分割内容
    text_lines = []
    for file_path in glob(
        "D:/MyWork/GitProject/LLM/llm-demo/temp-data/milvus_docs/zh_CN/faq/*.md",
        recursive=True,
    ):
        with open(file_path, "r", encoding="utf-8") as file:
            file_text = file.read()
        # 按标题分割文档段落，生成独立知识片段
        text_lines += file_text.split("# ")
    return text_lines


"""
嵌入式模型从魔搭下载
模型路径: https://modelscope.cn/models/BAAI/bge-small-zh-v1.5
下载命令: 
modelscope download --model BAAI/bge-small-zh-v1.5  --local_dir ./.models/bge-small-zh-v1.5


"""


def init_embedding_model():
    """初始化嵌入模型（支持本地模型或远程模型）"""
    local_model_path = r"D:/MyWork/GitProject/LLM/llm-demo/.models"
    model_kwargs = {"device": "cpu"}  # 若有GPU，使用"cuda"；否则用"cpu"
    encode_kwargs = {
        "normalize_embeddings": True
    }  # 归一化嵌入向量（推荐，便于计算余弦相似度）

    # 初始化 BGE 嵌入模型
    bge_embeddings = HuggingFaceEmbeddings(
        model_name=local_model_path + "/bge-small-zh-v1.5",
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs,
    )
    return bge_embeddings


"""
embedding_dim 是指向量的维度
"""


def get_Milvus_Client(embedding_dim):
    milvus_client = MilvusClient(uri="http://10.0.10.42:19530")
    collection_name = "my_rag_collection"
    if milvus_client.has_collection(collection_name):
        milvus_client.drop_collection(collection_name)
    # 创建集合：指定向量维度、度量类型和一致性级别
    milvus_client.create_collection(
        collection_name=collection_name,
        dimension=embedding_dim,
        metric_type="IP",  # 内积度量，适合文本相似性检索
        consistency_level="Strong",  # 强一致性保证数据可见性
    )
    return milvus_client


"""
这个在这里没有用上, 案例中使用的是单个文本处理的,便于理解
"""


def batch_emb_text(texts):
    """计算文本的嵌入向量"""
    bge_embeddings = init_embedding_model()
    embeddings = bge_embeddings.embed_documents(texts)
    return embeddings


"""
将文本通过嵌入式模型进行向量化
"""


def single_emb_text(text):
    bge_embeddings = init_embedding_model()
    embedding = bge_embeddings.embed_query(text)
    return embedding


"""

将文本向量化后存入Milvus数据库中
"""


def batch_insert_data(text_lines, milvus_client, collection_name):
    data = []
    for i, line in enumerate(tqdm(text_lines, desc="创建嵌入")):
        data.append(
            {
                "id": i,  # 自定义ID
                "vector": single_emb_text(line),  # 文本嵌入
                "text": line,  # 原始文本数据，作为动态字段存储
            }
        )

    # 批量插入数据
    insert_result = milvus_client.insert(collection_name=collection_name, data=data)
    print(f"插入成功，插入数量：{insert_result}")


if __name__ == "__main__":

    str = "LangChain 是一个用于构建 LLM 应用的框架"
    embedding = single_emb_text(str)
    print(len(embedding))

    text_lines = get_all_docs()
    # batch_emb_text(text_lines)

    milvus_client = get_Milvus_Client(512)
    batch_insert_data(text_lines, milvus_client, "my_rag_collection")

    question = "milvus 价格是多少"

    search_res = milvus_client.search(
        collection_name="my_rag_collection",
        data=[single_emb_text(question)],  # 向量化问题
        limit=3,  # 返回前三个结果
        search_params={"metric_type": "IP", "params": {}},  # Inner product distance
        output_fields=["text"],  # Return the text field
    )
    print(search_res)
    print("\n")
    print(search_res[0][0])

python 包版本是一个很大的坑

demo中各个包版本, 当前python3.10(3.10兼容的包版本最多)

复制代码

annotated-types==0.7.0
anyio==4.11.0
black==25.1.0
blinker==1.9.0
certifi==2024.12.14
charset-normalizer==3.4.1
click==8.2.1
colorama==0.4.6
contourpy==1.3.1
cycler==0.12.1
distro==1.9.0
exceptiongroup==1.3.0
filelock==3.20.0
Flask==3.1.1
flask-cors==6.0.1
fonttools==4.55.3
fsspec==2025.9.0
grpcio==1.76.0
h11==0.16.0
httpcore==1.0.9
httpx==0.28.1
huggingface-hub==0.36.0
idna==3.10
itsdangerous==2.2.0
Jinja2==3.1.6
jiter==0.11.1
joblib==1.5.2
jsonpatch==1.33
jsonpointer==3.0.0
kiwisolver==1.4.8
langchain==1.0.2
langchain-core==1.0.0
langchain-huggingface==1.0.0
langgraph==1.0.1
langgraph-checkpoint==3.0.0
langgraph-prebuilt==1.0.1
langgraph-sdk==0.2.9
langsmith==0.4.38
MarkupSafe==3.0.2
modelscope==1.31.0
mpmath==1.3.0
mypy_extensions==1.1.0
networkx==3.4.2
numpy==2.2.1
openai==2.6.0
orjson==3.11.3
ormsgpack==1.11.0
packaging==24.2
pandas==2.3.3
pathspec==0.12.1
pillow==11.0.0
platformdirs==4.3.8
protobuf==6.33.0
pydantic==2.12.3
pydantic_core==2.41.4
pymilvus==2.6.2
pyparsing==3.2.0
python-dateutil==2.9.0.post0
python-dotenv==1.1.1
pytz==2024.2
PyYAML==6.0.3
regex==2025.10.23
requests==2.32.5
requests-toolbelt==1.0.0
ruff==0.8.4
safetensors==0.6.2
scikit-learn==1.7.2
scipy==1.15.3
sentence-transformers==5.1.2
six==1.17.0
sniffio==1.3.1
sympy==1.14.0
tenacity==9.1.2
threadpoolctl==3.6.0
tokenizers==0.22.1
tomli==2.3.0
torch==2.8.0
tqdm==4.67.1
transformers==4.57.1
typing-inspection==0.4.2
typing_extensions==4.15.0
tzdata==2024.2
ujson==5.11.0
urllib3==2.3.0
Werkzeug==3.1.3
xxhash==3.6.0
zstandard==0.25.0