长期记忆存储:构建持久的 AI 记忆系统

前言
长期记忆是 AI Agent 持续学习的关键。需要设计持久化存储方案,让 Agent 能够保存和恢复记忆。
我在多个项目中实现过长期记忆存储,今天分享一些设计和实现。
存储接口设计
python
from abc import ABC, abstractmethod
from typing import List, Optional, Dict, Any
class MemoryStoreBackend(ABC):
"""记忆存储后端"""
@abstractmethod
def save(self, memory: MemoryItem):
"""保存记忆"""
pass
@abstractmethod
def load(self, memory_id: str) -> Optional[MemoryItem]:
"""加载记忆"""
pass
@abstractmethod
def load_all(self) -> List[MemoryItem]:
"""加载所有记忆"""
pass
@abstractmethod
def delete(self, memory_id: str):
"""删除记忆"""
pass
@abstractmethod
def search_similar(self, embedding: List[float],
top_k: int = 5) -> List[MemoryItem]:
"""相似性搜索"""
pass
文件存储后端
python
import os
import json
from typing import List, Optional, Dict
class FileMemoryStore(MemoryStoreBackend):
"""文件系统存储"""
def __init__(self, storage_path: str = "./memory"):
self.storage_path = storage_path
self.memory_file = os.path.join(storage_path, "memories.json")
self.ensure_storage()
def ensure_storage(self):
"""确保存储目录存在"""
os.makedirs(self.storage_path, exist_ok=True)
def _load_data(self) -> Dict[str, Any]:
"""从文件加载数据"""
if not os.path.exists(self.memory_file):
return {}
with open(self.memory_file, 'r', encoding='utf-8') as f:
return json.load(f)
def _save_data(self, data: Dict[str, Any]):
"""保存数据到文件"""
with open(self.memory_file, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
def save(self, memory: MemoryItem):
"""保存记忆"""
data = self._load_data()
data[memory.id] = memory.to_dict()
self._save_data(data)
def load(self, memory_id: str) -> Optional[MemoryItem]:
"""加载记忆"""
data = self._load_data()
if memory_id not in data:
return None
return self._dict_to_memory(data[memory_id])
def load_all(self) -> List[MemoryItem]:
"""加载所有记忆"""
data = self._load_data()
return [
self._dict_to_memory(item)
for item in data.values()
]
def delete(self, memory_id: str):
"""删除记忆"""
data = self._load_data()
if memory_id in data:
del data[memory_id]
self._save_data(data)
def _dict_to_memory(self, d: Dict) -> MemoryItem:
"""字典转记忆项"""
return MemoryItem(
id=d["id"],
type=MemoryType(d["type"]),
content=d["content"],
metadata=d.get("metadata", {}),
embedding=d.get("embedding"),
timestamp=d.get("timestamp", 0),
importance=d.get("importance", 1.0),
tags=d.get("tags", []),
access_count=d.get("access_count", 0),
last_access=d.get("last_access")
)
def search_similar(self, embedding: List[float],
top_k: int = 5) -> List[MemoryItem]:
"""相似搜索(文件版本)"""
# 简单实现:加载全部,计算相似度
memories = self.load_all()
if not memories:
return []
import numpy as np
scored = []
for mem in memories:
if mem.embedding:
# 计算相似度
sim = np.dot(embedding, mem.embedding) / (
np.linalg.norm(embedding) * np.linalg.norm(mem.embedding)
)
scored.append((sim, mem))
# 排序
scored.sort(key=lambda x: x[0], reverse=True)
return [mem for sim, mem in scored[:top_k]]
数据库存储后端
python
import sqlite3
from typing import List, Optional
class SQLiteMemoryStore(MemoryStoreBackend):
"""SQLite 存储"""
def __init__(self, db_path: str = "./memory/memories.db"):
self.db_path = db_path
self.ensure_database()
def ensure_database(self):
"""确保数据库存在"""
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
type TEXT NOT NULL,
content TEXT,
metadata TEXT,
embedding TEXT,
timestamp REAL,
importance REAL,
tags TEXT,
access_count INTEGER,
last_access REAL
)
""")
conn.commit()
conn.close()
def save(self, memory: MemoryItem):
"""保存记忆"""
import json
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("""
INSERT OR REPLACE INTO memories
(id, type, content, metadata, embedding, timestamp, importance, tags, access_count, last_access)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
memory.id,
memory.type.value,
str(memory.content),
json.dumps(memory.metadata),
json.dumps(memory.embedding) if memory.embedding else None,
memory.timestamp,
memory.importance,
json.dumps(memory.tags),
memory.access_count,
memory.last_access
))
conn.commit()
conn.close()
def load(self, memory_id: str) -> Optional[MemoryItem]:
"""加载记忆"""
import json
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("SELECT * FROM memories WHERE id = ?", (memory_id,))
row = cursor.fetchone()
conn.close()
if not row:
return None
return MemoryItem(
id=row[0],
type=MemoryType(row[1]),
content=row[2],
metadata=json.loads(row[3]) if row[3] else {},
embedding=json.loads(row[4]) if row[4] else None,
timestamp=row[5] or 0,
importance=row[6] or 1.0,
tags=json.loads(row[7]) if row[7] else [],
access_count=row[8] or 0,
last_access=row[9]
)
def load_all(self) -> List[MemoryItem]:
"""加载所有记忆"""
import json
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("SELECT * FROM memories")
rows = cursor.fetchall()
conn.close()
memories = []
for row in rows:
memories.append(MemoryItem(
id=row[0],
type=MemoryType(row[1]),
content=row[2],
metadata=json.loads(row[3]) if row[3] else {},
embedding=json.loads(row[4]) if row[4] else None,
timestamp=row[5] or 0,
importance=row[6] or 1.0,
tags=json.loads(row[7]) if row[7] else [],
access_count=row[8] or 0,
last_access=row[9]
))
return memories
def delete(self, memory_id: str):
"""删除记忆"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute("DELETE FROM memories WHERE id = ?", (memory_id,))
conn.commit()
conn.close()
def search_similar(self, embedding: List[float],
top_k: int = 5) -> List[MemoryItem]:
"""相似搜索"""
# 简化版本
memories = self.load_all()
import numpy as np
scored = []
for mem in memories:
if mem.embedding:
sim = np.dot(embedding, mem.embedding) / (
np.linalg.norm(embedding) * np.linalg.norm(mem.embedding)
)
scored.append((sim, mem))
scored.sort(key=lambda x: x[0], reverse=True)
return [mem for sim, mem in scored[:top_k]]
完整的持久化记忆系统
python
class PersistentMemoryStore(MemoryStore):
"""持久化记忆存储"""
def __init__(self, backend: MemoryStoreBackend,
dimension: int = 768):
super().__init__(dimension)
self.backend = backend
self._load_from_backend()
def _load_from_backend(self):
"""从后端加载"""
memories = self.backend.load_all()
for mem in memories:
self.memories[mem.id] = mem
self.embeddings.append(mem.embedding)
self.id_map.append(mem.id)
self._rebuild_index()
def add(self, memory: MemoryItem):
"""添加记忆"""
super().add(memory)
self.backend.save(memory)
def delete(self, memory_id: str):
"""删除记忆"""
if memory_id in self.memories:
del self.memories[memory_id]
# 重建索引
self.memories = {k: v for k, v in self.memories.items() if k != memory_id}
self.embeddings = [self.memories[mid].embedding for mid in self.memories.keys()]
self.id_map = list(self.memories.keys())
self._rebuild_index()
self.backend.delete(memory_id)
def save(self):
"""保存所有"""
for mem in self.memories.values():
self.backend.save(mem)
总结
长期记忆存储要点:
- 后端抽象:统一的存储接口
- 多后端支持:文件、数据库等
- 增量存储:定期保存避免数据丢失
- 序列化:对象序列化存储
- 恢复机制:启动时自动恢复
实践建议:
- 从小文件开始
- 定期备份重要数据
- 监控存储大小
- 考虑使用专业的向量数据库