LLM 默认是无状态的,即询问当前的问题与上下文无关,当我们需要将多轮对话信息给到LLM 时,就需要使用缓存Memory。缓存方式有多种。
python
from langchain import OpenAI
from langchain.chains import ConversationChain
# first initialize the large language model
llm = OpenAI(
temperature=0,
openai_api_key="OPENAI_API_KEY",
model_name="text-davinci-003" # 也可用gpt-3.5-turbo
)
# now initialize the conversation chain 默认无缓存
conversation = ConversationChain(llm=llm)
# 方式1 ConversationBufferMemory: 会将之前所有对话都作为输入送到LLM中,受模型接受token数量的限制
from langchain.chains.conversation.memory import ConversationBufferMemory
conversation_buf = ConversationChain(
llm=llm,
memory=ConversationBufferMemory()
)
# 方式2 ConversationSummaryMemory:将之前对话总结Summary后,加上新的询问query输入到LLM中
from langchain.chains.conversation.memory import ConversationSummaryMemory
conversation = ConversationChain(
llm=llm,
memory=ConversationSummaryMemory(llm=llm)
)
# 方式3 ConversationBufferWindowMemory:将最近k轮对话,加上新的询问query输入到LLM中
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
conversation = ConversationChain(
llm=llm,
memory=ConversationBufferWindowMemory(k=1)
)
# 方式4 ConversationSummaryBufferMemory:将很久之前的对话Summary,最近的对话保存全部送入LLM中
conversation_sum_bufw = ConversationChain(
llm=llm, memory=ConversationSummaryBufferMemory(
llm=llm,
max_token_limit=650
)
# 其它 Memory 类型
# ConversationKnowledgeGraphMemory
# ConversationEntityMemory