langchain 的向量存储
langchain内提供向量存储的功能,可以基于
- InMemoryVectorStore 完成内存向量存储
- chroma,外部数据库存储
向量存储类均提供三个通用接口:
- add_document,添加文档到向量存储
- delete,从向量存储中删除文档
- similarity_search,相似度搜索

python
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_chroma import Chroma
from langchain_community.embeddings import DashScopeEmbeddings
from langchain_community.document_loaders import CSVLoader
# vector_store = InMemoryVectorStore( #内存向量存储
# embedding=DashScopeEmbeddings()
# )
vector_store = Chroma( #文件持久化存储向量数据库存储
collection_name="test", #当前向量存储名称
embedding_function=DashScopeEmbeddings(), #嵌入模型
persist_directory="./data/chroma_langchain_db" # 指定数据存放的文件夹
)
loader = CSVLoader(
file_path="/Users/apple/Desktop/agent_student/qianwen/data/info.csv",
encoding="utf-8",
source_column="source" #指定本条数据来源哪里
)
document = loader.load()
new_document = vector_store.add_documents( #添加数据
documents = document,
ids=["id" + str(i) for i in range(1,len(document)+1)]
)
vector_store.delete(["id1","id2"]) #删除索引
result = vector_store.similarity_search(
"python 是什么",
10,
filter={"source":"黑马程序员"}
)
print(result)