使用huggingface的text embedding models

python 复制代码
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Tongyi
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings
import os
import time
os.environ["DASHSCOPE_API_KEY"] = "sk-cc1c8314fdbd43ceaf26ec1824d5dd3b"
llm = Tongyi()

from langchain_community.document_loaders import UnstructuredURLLoader

embeddings = HuggingFaceEmbeddings()

# 记录开始时间
start_time = time.time()
text = "This is a test document."

query_result = embeddings.embed_query(text)

end_time = time.time()
# 计算并打印函数执行时间
execution_time = end_time - start_time
print(f"函数执行时间: {execution_time} 秒")
print(query_result[:3])


urls = [
    "https://en.wikipedia.org/wiki/Android_(operating_system)"
]

loader = UnstructuredURLLoader(urls=urls)
documents = loader.load_and_split()
# print(documents)


# # 第一次存入本地
# vectorstore = FAISS.from_documents(documents, embeddings)
# vectorstore.save_local("faiss_index2")


# 记录开始时间
start_time = time.time()

# # 从本地加载
vectorstore = FAISS.load_local("faiss_index2", embeddings)

retriever = vectorstore.as_retriever()
template = """Answer the question based on the context below. If the
question cannot be answered using the information provided answer
with "I don't know"     

Context: {context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

output_parser = StrOutputParser()
setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | llm | output_parser
print(chain.invoke("what is android"))
# 计算并打印函数执行时间
end_time = time.time()
execution_time = end_time - start_time
print(f"函数执行时间: {execution_time} 秒")

上面是使用的默认的模型,以下指定使用 all-MiniLM-L6-v2:

python 复制代码
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Tongyi
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings
import os
import time
os.environ["DASHSCOPE_API_KEY"] = "sk-cc1c8314fdbd43ceaf26ec1824d5dd3b"
llm = Tongyi()

from langchain_community.document_loaders import UnstructuredURLLoader
model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(
         model_name=model_name,
)

# 记录开始时间
start_time = time.time()
text = "This is a test document."

query_result = embeddings.embed_query(text)

end_time = time.time()
# 计算并打印函数执行时间
execution_time = end_time - start_time
print(f"函数执行时间: {execution_time} 秒")
print(query_result[:3])


urls = [
    "https://en.wikipedia.org/wiki/Android_(operating_system)"
]

loader = UnstructuredURLLoader(urls=urls)
documents = loader.load_and_split()
# print(documents)


# 记录开始时间
start_time = time.time()

# 第一次存入本地
vectorstore = FAISS.from_documents(documents, embeddings)
vectorstore.save_local("faiss_index2")


# # 从本地加载
# vectorstore = FAISS.load_local("faiss_index2", embeddings)

retriever = vectorstore.as_retriever()
template = """Answer the question based on the context below. If the
question cannot be answered using the information provided answer
with "I don't know"     

Context: {context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

output_parser = StrOutputParser()
setup_and_retrieval = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | llm | output_parser
print(chain.invoke("what is android"))
# 计算并打印函数执行时间
end_time = time.time()
execution_time = end_time - start_time
print(f"函数执行时间: {execution_time} 秒")

关于可以使用的模型,可以看这里

相关推荐
Philtell12 小时前
Diffusion Model扩散模型中的time embeding的作用
embedding
zhangfeng113312 小时前
大语言模型 bpe算法 后面对接的是 one-hot吗 nn.Embedding
算法·语言模型·embedding
andwhataboutit?1 天前
embedding model
embedding
程序员泠零澪回家种桔子4 天前
RAG中的Embedding技术
人工智能·后端·ai·embedding
Zilliz Planet5 天前
熠智AI+Milvus:从Embedding 到数据处理、问题重写,电商AI客服架构怎么搭?
人工智能·架构·embedding·milvus
CCPC不拿奖不改名5 天前
面向计算机应用的数学
人工智能·python·rnn·深度学习·embedding·应用开发数学
问道飞鱼5 天前
【大模型学习】词嵌入(Word Embedding)深度解析:从符号到向量的语义映射
word·embedding·词嵌入
Java后端的Ai之路6 天前
【AI大模型开发】-基于 Word2Vec 的中文古典小说词向量分析实战
人工智能·embedding·向量·word2vec·ai大模型开发
laplace01237 天前
向量库 Qdrant + 图数据库Neo4j+Embedding阿里百炼text-embedding-v3
数据库·embedding·agent·neo4j
CCPC不拿奖不改名7 天前
循环神经网络RNN:整数索引→稠密向量(嵌入层 / Embedding)详解
人工智能·python·rnn·深度学习·神经网络·自然语言处理·embedding