本文演示了如何使用 pgvector 扩展在PostgreSQL 上构建生成式 AI 应用,涵盖语义搜索、推荐系统、RAG、聊天机器人、语义缓存等场景。原始 Workshop 依赖:
环境搭建
启动 pgvector 容器
bash
docker run -d \
--name pgvector-lab \
-e POSTGRES_USER=postgres \
-e POSTGRES_PASSWORD=postgres \
-e POSTGRES_DB=workshop \
-p 5433:5432 \
pgvector/pgvector:pg16
验证:
$ docker exec pgvector-lab psql -U postgres -d workshop -c "CREATE EXTENSION IF NOT EXISTS vector; SELECT extname, extversion FROM pg_extension WHERE extname='vector';"
CREATE EXTENSION
extname | extversion
---------+------------
vector | 0.8.2
使用 uv 管理python依赖
bash
export PATH="$HOME/.local/bin:$PATH"
uv add psycopg2-binary pgvector openai python-dotenv pandas requests \
langchain langchain-openai langchain-community langchain-experimental \
sentence-transformers datasets \
-i https://mirrors.aliyun.com/pypi/simple/
下载本地嵌入模型,由于HuggingFace 在中国区不可达,使用 ModelScope 下载
sentence-transformers/all-MiniLM-L6-v2是轻量级文本嵌入(Text Embedding)模型
python
from modelscope import snapshot_download
model_dir = snapshot_download('sentence-transformers/all-MiniLM-L6-v2',
cache_dir='./models')
项目结构
pgvector/
├── .env # API 和数据库配置
├── shared/
│ ├── config.py # 共享配置(环境变量)
│ ├── db.py # 数据库连接(psycopg2 + pgvector)
│ ├── embeddings.py # 嵌入生成(OpenAI API)
│ ├── llm.py # LLM 调用(chat + stream)
│ └── sentiment.py # 情感分析(LLM 替代 Comprehend)
├── models/ # 本地模型缓存
│ └── sentence-transformers/all-MiniLM-L6-v2/
├── lab01_semantic_search.py
├── product_recommendation.py
├── rag.py
├── movie_recommendation.py
├── lab07_chatbot.py
└── semantic_cache.py
配置文件 .env
bash
# OpenAI 兼容 API(替换为你自己的 endpoint)
OPENAI_API_KEY=sk-xxx
OPENAI_BASE_URL=https://api.openai.com/v1
OPENAI_MODEL=gpt-4o-mini
EMBEDDING_MODEL=text-embedding-3-small
# 本地嵌入模型路径
LOCAL_MODEL_PATH=./models/sentence-transformers/all-MiniLM-L6-v2
# PostgreSQL 连接
PG_HOST=localhost
PG_PORT=5433
PG_DB=workshop
PG_USER=postgres
PG_PASSWORD=postgres
嵌入生成
托管的embedding端点
python
import numpy as np
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from shared.config import (
OPENAI_API_KEY, OPENAI_BASE_URL, EMBEDDING_MODEL, EMBEDDING_DIM,
LOCAL_MODEL_PATH, LOCAL_EMBEDDING_DIM
)
def get_embedding(text: str, use_local=False) -> list[float]:
"""生成单条文本的嵌入向量。"""
if use_local:
return get_local_embedding(text)
client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
resp = client.embeddings.create(input=[text], model=EMBEDDING_MODEL)
return resp.data[0].embedding
def get_embeddings_batch(texts: list[str], use_local=False) -> list[list[float]]:
"""批量生成嵌入向量。"""
if use_local:
return get_local_embeddings_batch(texts)
client = OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
resp = client.embeddings.create(input=texts, model=EMBEDDING_MODEL)
return [d.embedding for d in resp.data]
本地嵌入模型,对中文嵌入可以考虑BGE-large-zh-v1.5
py
_local_model = None
def _get_local_model():
global _local_model
if _local_model is None:
_local_model = SentenceTransformer(LOCAL_MODEL_PATH)
return _local_model
def get_local_embedding(text: str) -> list[float]:
model = _get_local_model()
return model.encode(text).tolist()
def get_local_embeddings_batch(texts: list[str]) -> list[list[float]]:
model = _get_local_model()
return model.encode(texts).tolist()
LLM 调用
python
from openai import OpenAI
from shared.config import OPENAI_API_KEY, OPENAI_BASE_URL, OPENAI_MODEL
def _get_client():
return OpenAI(api_key=OPENAI_API_KEY, base_url=OPENAI_BASE_URL)
def chat(prompt: str, system: str = "You are a helpful assistant.", temperature: float = 0.3) -> str:
client = _get_client()
resp = client.chat.completions.create(
model=OPENAI_MODEL,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": prompt},
],
temperature=temperature,
)
return resp.choices[0].message.content
def chat_stream(prompt: str, messages: list[dict] = None, system: str = "You are a helpful assistant."):
client = _get_client()
if messages is None:
messages = []
full_messages = [{"role": "system", "content": system}] + messages + [{"role": "user", "content": prompt}]
stream = client.chat.completions.create(
model=OPENAI_MODEL,
messages=full_messages,
stream=True,
)
for chunk in stream:
delta = chunk.choices[0].delta
if delta.content:
yield delta.content
情感分析
用LLM判断文本的情感倾向,输出特定内容
python
from shared.llm import chat
def analyze_sentiment(text: str) -> str:
"""用 LLM 判断文本情感,返回 POSITIVE / NEGATIVE / MIXED / NEUTRAL。"""
prompt = f"""Analyze the sentiment of the following text.
Return ONLY one word: POSITIVE, NEGATIVE, MIXED, or NEUTRAL.
Text: {text}"""
result = chat(prompt, temperature=0.0)
for label in ["POSITIVE", "NEGATIVE", "MIXED", "NEUTRAL"]:
if label in result.upper():
return label
return "NEUTRAL"
def analyze_sentiment_batch(texts: list[str]) -> list[str]:
"""批量情感分析。"""
return [analyze_sentiment(t) for t in texts]
pgvector语法
建表与列类型
sql
-- vector(N) 声明固定维度(N)的向量列,最大支持 16000 维
CREATE TABLE items (
id bigserial PRIMARY KEY,
content text,
embedding vector(1024) -- 固定 1024 维
);
-- 不指定维度时为变长向量(不推荐,无法建索引)
-- embedding vector
pgvector 提供 4 种向量距离运算符,这些运算符可以用在 ORDER BY(排序搜索)和 WHERE(范围过滤)中。
| 运算符 | 距离类型 | 公式 | 适用场景 |
|---|---|---|---|
<=> |
余弦距离 | 1 - cos(a,b) |
文本语义搜索(最常用) |
<-> |
L2 距离 | √Σ(ai-bi)² |
图像搜索 |
<#> |
内积(负值) | -Σ(ai·bi) |
归一化向量 |
<+> |
L1 距离 | `Σ | ai-bi |
sql
-- 余弦相似度搜索:1 - (embedding <=> query) 即为相似度分数(0~1,越大越相似)
SELECT *, 1 - (embedding <=> '[0.1, 0.2, ...]'::vector) AS similarity
FROM items
ORDER BY embedding <=> '[0.1, 0.2, ...]'::vector -- ORDER BY 用距离(越小越好)
LIMIT 5;
-- 范围过滤:只返回距离小于 0.5 的结果
SELECT * FROM items
WHERE embedding <=> '[0.1, 0.2, ...]'::vector < 0.5;
-- 注意 <#> 返回的是负内积,取相似度需要乘 -1
SELECT (embedding <#> '[0.1, 0.2, ...]'::vector) * -1 AS inner_product FROM items;
类型转换 ::vector
sql
-- 将文本/数组显式转换为 vector 类型
SELECT '[1,2,3]'::vector;
-- 在 SQL 中使用参数时也需要转换
... WHERE embedding <=> %s::vector ...
当通过 psycopg2 传入 numpy 数组时,register_vector() 已自动处理类型映射,但 SQL 中显式 %s::vector 可以帮助 PostgreSQL 查询优化器选择正确的索引。
聚合函数
sql
-- 计算所有向量的平均值(可用于生成类别中心向量)
SELECT AVG(embedding) FROM items;
-- 按类别分组取平均
SELECT category, AVG(embedding) AS centroid FROM items GROUP BY category;
索引类型,具体参考https://www.cnblogs.com/peacemaple/p/19745520
HNSW (Hierarchical Navigable Small World)
- 分层图结构,低延迟高精度
- 支持并发插入/更新
- 适合大多数生产场景
- 不需要事先有数据即可建索引
- 关键参数:
m(连接数,默认16),ef_construction(构建精度,默认64)
IVFFlat (Inverted File with Flat compression)
- 基于聚类,构建速度快
- 支持并行索引构建
- 召回率略低于 HNSW
- 需要先有数据再建索引(聚类需要样本)
- 关键参数:
lists(聚类数,建议行数/1000或√行数)
sql
-- HNSW 索引:推荐大多数场景使用
-- vector_cosine_ops 对应 <=> 运算符
-- vector_l2_ops 对应 <-> 运算符
-- vector_ip_ops 对应 <#> 运算符
-- vector_l1_ops 对应 <+> 运算符
CREATE INDEX idx_items_hnsw ON items # 创建一个索引 idx_items_hnsw 作用在 items 这张表上
USING hnsw (embedding vector_cosine_ops) # 对 embedding 向量字段建索引,使用余弦相似度计算距离
WITH (m = 16, ef_construction = 64); # HNSW 调参
-- IVFFlat 索引:数据量大且更新少的场景
CREATE INDEX idx_items_ivf ON items
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
-- 查询时调整搜索精度
SET hnsw.ef_search = 100; -- HNSW 候选列表大小(默认40,越大越精确但越慢)
SET ivfflat.probes = 10; -- IVFFlat 探测列表数(默认1,越大越精确)
语义搜索与情感分析
构建语义搜索引擎,结合情感分析理解客户评论。将评论文本转为向量,利用 pgvector 的余弦距离 <=> 进行语义搜索,同时用 LLM 判断每条评论的情感倾向。
数据准备
python
import numpy as np
from shared.db import get_connection, execute_sql
from shared.embeddings import get_embeddings_batch
from shared.sentiment import analyze_sentiment
def get_connection():
"""获取数据库连接,自动注册 pgvector 类型"""
conn = psycopg2.connect(DB_DSN)
register_vector(conn) # psycopg2自动将numpy数组转成vector类型
return conn
conn = get_connection()
# 1. 建表
execute_sql(conn, """
DROP TABLE IF EXISTS lab01_reviews;
CREATE TABLE lab01_reviews (
id serial PRIMARY KEY,
hotel_name varchar(100),
review_text text,
rating int,
embedding vector(1024) -- pgvector 向量列,维度与嵌入模型一致
);
""")
# 2. 插入样本数据
reviews = [
("Grand Hotel", "Exceptional stay! The staff was wonderful and the room was spotless.", 5),
("Grand Hotel", "Average experience, nothing special but decent value.", 3),
("Budget Inn", "Terrible experience. The room was dirty and uncomfortable.", 1),
("Budget Inn", "It was okay for the price. Not great but not terrible either.", 2),
("Luxury Suites", "Absolutely wonderful! Best hotel experience ever.", 5),
("Luxury Suites", "Overpriced for what you get. Expected more.", 2),
("Seaside Resort", "Beautiful location with amazing ocean views. Highly recommend!", 5),
("Seaside Resort", "The view was nice but service was slow.", 3),
("Airport Motel", "Convenient location but very noisy. Barely slept.", 2),
("Airport Motel", "Good enough for a one-night stopover. Clean room.", 3),
("Mountain Lodge", "Perfect mountain getaway! Cozy and peaceful.", 5),
("Mountain Lodge", "Nice views but the room was a bit dated.", 3),
("City Center Hotel", "Great location, walking distance to everything.", 4),
("City Center Hotel", "Staff was rude and unhelpful. Will not return.", 1),
("Lakeside Inn", "Peaceful and relaxing. The lake views were stunning.", 5),
("Lakeside Inn", "Decent place, nothing remarkable.", 3),
]
# 3. 批量生成嵌入并存入数据库
texts = [r[1] for r in reviews]
embeddings = get_embeddings_batch(texts) # 1024 维
with conn.cursor() as cur:
for (hotel, text, rating), emb in zip(reviews, embeddings):
# np.array(emb) 通过 register_vector 自动转为 PostgreSQL vector 类型
cur.execute(
"INSERT INTO lab01_reviews (hotel_name, review_text, rating, embedding) VALUES (%s, %s, %s, %s)",
(hotel, text, rating, np.array(emb)) # 将list转换为numpy array
)
语义搜索和情感分析
python
def semantic_search(conn, query: str, top_k: int = 3):
"""语义搜索:将查询文本转为向量,用余弦距离找最相似的评论。"""
query_embedding = np.array(get_embeddings_batch([query])[0])
with conn.cursor() as cur:
# embedding <=> %s::vector → 余弦距离(值越小越相似)
# 1 - (embedding <=> ...) → 余弦相似度(值越大越相似,0~1)
# ORDER BY embedding <=> ... → 按距离升序排列(最近的最先)
cur.execute("""
SELECT hotel_name, review_text, rating,
1 - (embedding <=> %s::vector) AS similarity
FROM lab01_reviews
ORDER BY embedding <=> %s::vector
LIMIT %s;
""", (query_embedding, query_embedding, top_k))
return cur.fetchall()
# 搜索并展示结果
for query in ["exceptional stay wonderful experience",
"terrible dirty uncomfortable",
"average okay decent place"]:
print(f'\n查询: "{query}"')
results = semantic_search(conn, query)
for i, (hotel, text, rating, sim) in enumerate(results, 1):
sentiment = analyze_sentiment(text)
print(f" #{i} [{hotel}] 相似度: {sim:.4f} | 评分: {rating} | 情感: {sentiment}")
测试结果:
查询: "exceptional stay wonderful experience"
#1 [Grand Hotel] 相似度: 0.7110 | 评分: 5 | 情感: POSITIVE
#2 [Luxury Suites] 相似度: 0.6758 | 评分: 5 | 情感: POSITIVE
#3 [Seaside Resort] 相似度: 0.6542 | 评分: 5 | 情感: POSITIVE
查询: "terrible dirty uncomfortable"
#1 [Budget Inn] 相似度: 0.6372 | 评分: 1 | 情感: NEGATIVE
#2 [Budget Inn] 相似度: 0.5692 | 评分: 2 | 情感: MIXED
#3 [Grand Hotel] 相似度: 0.5679 | 评分: 5 | 情感: POSITIVE
查询: "average okay decent place"
#1 [Budget Inn] 相似度: 0.6272 | 评分: 2 | 情感: MIXED
#2 [Grand Hotel] 相似度: 0.6210 | 评分: 3 | 情感: MIXED
#3 [Airport Motel] 相似度: 0.5983 | 评分: 2 | 情感: MIXED
情感分析汇总:
POSITIVE : ██████ (6)
MIXED : █████ (5)
NEGATIVE : ███ (3)
NEUTRAL : ██ (2)
产品推荐引擎
主要目的是对比 API 嵌入(1024维)和本地嵌入(384维)的推荐效果。同一批产品描述分别用两种模型生成嵌入,对比相同查询下的推荐结果和性能。
数据准备
python
from shared.db import get_connection, execute_sql
from shared.embeddings import get_embeddings_batch, get_local_embeddings_batch
import numpy as np
import time
conn = get_connection()
# 建两张表,分别存储不同维度的嵌入
execute_sql(conn, """
DROP TABLE IF EXISTS products_api;
DROP TABLE IF EXISTS products_local;
CREATE TABLE products_api (
id serial PRIMARY KEY,
name varchar(200),
category varchar(50),
description text,
price decimal(10,2),
embedding vector(1024)
);
CREATE TABLE products_local (
id serial PRIMARY KEY,
name varchar(200),
category varchar(50),
description text,
price decimal(10,2),
embedding vector(384)
);
""")
products = [
("Wireless NC Headphones", "Electronics", "Premium wireless noise-cancelling headphones with 30h battery", 299.99),
("Bluetooth Speaker", "Electronics", "Portable waterproof speaker with rich bass", 79.99),
# ......
("Winter Down Jacket", "Clothing", "Insulated down jacket for extreme cold weather", 199.99),
("Cotton Summer Dress", "Clothing", "Lightweight cotton dress for summer", 49.99),
]
# API 嵌入(1024 维)
texts = [p[3] for p in products]
t0 = time.time()
api_embeddings = get_embeddings_batch(texts)
api_time = time.time() - t0
with conn.cursor() as cur:
for prod, emb in zip(products, api_embeddings):
cur.execute(
"INSERT INTO products_api (name, category, description, price, embedding) VALUES (%s,%s,%s,%s,%s)",
(*prod, np.array(emb))
)
# 本地嵌入(384 维)
t0 = time.time()
local_embeddings = get_local_embeddings_batch(texts)
local_time = time.time() - t0
with conn.cursor() as cur:
for prod, emb in zip(products, local_embeddings):
cur.execute(
"INSERT INTO products_local (name, category, description, price, embedding) VALUES (%s,%s,%s,%s,%s)",
(*prod, np.array(emb))
)
print(f"API 嵌入耗时: {api_time:.2f}s, 本地嵌入耗时: {local_time:.2f}s")
对比搜索
python
def search_products(conn, query: str, table: str, dim: int, top_k: int = 3):
"""在指定表中搜索与 query 语义最相似的产品。"""
use_local = (dim == 384)
q_emb = np.array(get_embeddings_batch([query], use_local=use_local)[0])
with conn.cursor() as cur:
cur.execute(f"""
SELECT name, 1 - (embedding <=> %s::vector) AS similarity
FROM {table}
ORDER BY embedding <=> %s::vector
LIMIT %s;
""", (q_emb, q_emb, top_k))
return cur.fetchall()
for query in ["warm winter jacket for outdoor activities",
"wireless noise cancelling headphones"]:
print(f'\n查询: "{query}"')
t0 = time.time()
api_results = search_products(conn, query, "products_api", 1024)
api_elapsed = time.time() - t0
t0 = time.time()
local_results = search_products(conn, query, "products_local", 384)
local_elapsed = time.time() - t0
print(f" API (1024维) | 本地 (384维)")
for (a_name, a_sim), (l_name, l_sim) in zip(api_results, local_results):
print(f" {a_name} ({a_sim:.3f}){' '*(28-len(a_name)-8)} | {l_name} ({l_sim:.3f})")
print(f" 耗时: {api_elapsed:.3f}s{' '*22} | 耗时: {local_elapsed:.3f}s")
测试结果:
查询: "warm winter jacket for outdoor activities"
API (1024维) | 本地 (384维)
Winter Down Jacket (0.649) | Winter Down Jacket (0.545)
Merino Wool Sweater (0.603) | Cotton Summer Dress (0.359)
Cashmere Scarf (0.583) | Linen Button-Down Shirt (0.353)
耗时: 0.500s | 耗时: 0.019s
查询: "wireless noise cancelling headphones"
API (1024维) | 本地 (384维)
Wireless NC Headphones (0.676) | Wireless NC Headphones (0.274)
Bluetooth Speaker (0.575) | Bluetooth Speaker (0.228)
耗时: 0.329s | 耗时: 0.016s
总结:
API 嵌入 (1024维): 嵌入耗时 11.21s,语义更丰富,相似度更高
本地嵌入 (384维): 嵌入耗时 0.10s,速度快 100x,相似度较低但排序基本正确
结论如下
- API 嵌入的相似度分数显著更高(0.65 vs 0.27),区分度更好
- 本地模型速度快 50-100 倍(无网络开销)
- 两种方式的第一名推荐一致,但后续排序有差异
检索增强生成
需求是实现 RAG 流水线,对比有无 RAG 的回答质量。步骤为将知识文档切块 → 生成嵌入 → 存入 pgvector。用户提问时先检索相关文档,再注入 LLM 上下文生成回答。
数据准备
python
from shared.db import get_connection, execute_sql
from shared.embeddings import get_embeddings_batch
from shared.llm import chat
import numpy as np
conn = get_connection()
# 建表
execute_sql(conn, """
DROP TABLE IF EXISTS knowledge_base;
CREATE TABLE knowledge_base (
id serial PRIMARY KEY,
title varchar(200),
content text,
embedding vector(1024)
);
""")
# 知识文档
documents = [
("pgvector Overview",
"pgvector is an open-source PostgreSQL extension that adds vector similarity search capabilities. "
"It supports vectors up to 16,000 dimensions and provides HNSW and IVFFlat indexing methods."),
("LangChain with pgvector",
"LangChain provides a pgvector integration through the PGVector vector store. "
"It supports similarity search, MMR search, and can be used as a retriever in RAG pipelines."),
("Hybrid Search",
"Hybrid search combines vector similarity search with traditional keyword search (BM25). "
"pgvector can be combined with PostgreSQL full-text search for better recall."),
("Amazon Aurora",
"Amazon Aurora is a MySQL and PostgreSQL-compatible relational database built for the cloud. "
"It delivers up to 3x the throughput of standard PostgreSQL."),
("Aurora ML",
"Aurora ML enables machine learning inference directly within SQL queries using AWS services "
"like SageMaker and Bedrock, without needing to export data."),
("Transformer Architecture",
"The Transformer architecture uses self-attention mechanisms to process input sequences in parallel. "
"It forms the basis of modern LLMs like GPT and BERT."),
("pgvector Distance Operators",
"pgvector provides <=> (cosine distance), <-> (L2 distance), <#> (inner product), "
"and <+> (L1 distance) operators for similarity calculations."),
]
# 生成嵌入并存储
texts = [f"{d[0]}. {d[1]}" for d in documents]
embeddings = get_embeddings_batch(texts)
with conn.cursor() as cur:
for (title, content), emb in zip(documents, embeddings):
cur.execute(
"INSERT INTO knowledge_base (title, content, embedding) VALUES (%s, %s, %s)",
(title, content, np.array(emb))
)
RAG 查询
python
def rag_query(conn, question: str, top_k: int = 3) -> str:
"""RAG 查询:检索相关知识 → 构造 prompt → LLM 回答。"""
q_emb = np.array(get_embeddings_batch([question])[0])
with conn.cursor() as cur:
# Step 1: pgvector 相似性搜索,取 top_k 条最相关文档
cur.execute("""
SELECT title, content, 1 - (embedding <=> %s::vector) AS similarity
FROM knowledge_base
ORDER BY embedding <=> %s::vector
LIMIT %s;
""", (q_emb, q_emb, top_k))
sources = cur.fetchall()
# Step 2: 拼接上下文
context = "\n\n".join([f"[{title}] {content}" for title, content, sim in sources])
# Step 3: 构造 RAG prompt
prompt = f"""Based on the following reference materials, answer the question in Chinese.
If the references don't contain relevant information, say so.
References:
{context}
Question: {question}"""
return chat(prompt)
# 对比测试
question = "pgvector 是什么?它能做什么?"
print(rag_query(conn, question))
# 无 RAG(直接问 LLM)
print("---\n无 RAG:")
print(chat(question))
测试结果:
问题: pgvector 是什么?它能做什么?
检索到:
- [pgvector Overview] 相似度: 0.8512
- [LangChain with pgvector] 相似度: 0.7232
- [Hybrid Search] 相似度: 0.6375
回答: pgvector 是一个开源的 PostgreSQL 扩展,它为 PostgreSQL 添加了向量相似性搜索功能。
它支持存储高达 16,000 维的向量,并提供 HNSW 和 IVFFlat 索引方法...
RAG vs 无 RAG 对比:
问题: "What is pgvector?"
[有 RAG]: pgvector 是一个开源的 PostgreSQL 扩展,添加了向量相似性搜索功能...
[无 RAG]: pgvector is an open-source extension for PostgreSQL that adds support
for vector similarity search... (英文回答,信息较泛)
电影推荐系统
用 Python API 调用替代 Aurora ML 的 SQL 内推理,实现电影推荐和评论摘要。原始 Workshop 使用 aws_bedrock.invoke_model_get_embeddings() 在 SQL 中直接生成嵌入,我们改用 Python 先调用 API 生成嵌入再存入数据库。
数据准备
python
from shared.db import get_connection, execute_sql
from shared.embeddings import get_embeddings_batch
from shared.llm import chat
import numpy as np
conn = get_connection()
execute_sql(conn, """
DROP TABLE IF EXISTS movies;
CREATE TABLE movies (
id serial PRIMARY KEY,
title varchar(200),
year int,
genre varchar(50),
overview text,
embedding vector(1024)
);
DROP TABLE IF EXISTS reviews;
CREATE TABLE reviews (
id serial PRIMARY KEY,
movie_id int REFERENCES movies(id),
review_text text
);
""")
movies = [
("Inception", 2010, "Sci-Fi", "A thief who enters the dreams of others to steal their secrets is offered a chance to have his criminal record erased."),
("The Shawshank Redemption", 1994, "Drama", "Two imprisoned men bond over a number of years, finding solace and eventual redemption through acts of common decency."),
("Parasite", 2019, "Thriller", "Greed and class discrimination threaten the newly formed symbiotic relationship between the wealthy Park family and the destitute Kim clan."),
("The Grand Budapest Hotel", 2014, "Comedy", "A writer encounters the owner of an aging high-class hotel, who tells him of his early years serving as a lobby boy."),
("Interstellar", 2014, "Sci-Fi", "A team of explorers travel through a wormhole in space in an attempt to ensure humanity's survival."),
("Arrival", 2016, "Sci-Fi", "A linguist works with the military to communicate with alien lifeforms after twelve mysterious spacecraft appear around the world."),
]
reviews = {
"Inception": [
"Mind-bending masterpiece! Nolan outdid himself with the dream layers.",
"Great visuals but confusing plot. Had to watch twice to understand.",
],
"Interstellar": [
"Visually stunning and emotionally powerful. The docking scene is cinema at its finest.",
"The science is mostly accurate and the story is deeply moving.",
],
}
# 插入电影数据(不含嵌入)
with conn.cursor() as cur:
movie_ids = {}
for title, year, genre, overview in movies:
cur.execute(
"INSERT INTO movies (title, year, genre, overview) VALUES (%s,%s,%s,%s) RETURNING id",
(title, year, genre, overview)
)
movie_ids[title] = cur.fetchone()[0]
for title, revs in reviews.items():
mid = movie_ids[title]
for r in revs:
cur.execute("INSERT INTO reviews (movie_id, review_text) VALUES (%s, %s)", (mid, r))
# 生成嵌入并更新
overviews = [m[3] for m in movies]
embeddings = get_embeddings_batch(overviews)
with conn.cursor() as cur:
for (_, _, _, _), emb, (title, _, _, _) in zip(movies, embeddings, movies):
cur.execute(
"UPDATE movies SET embedding = %s WHERE title = %s",
(np.array(emb), title)
)
推荐搜索
python
def find_similar_movies(conn, movie_title: str, top_k: int = 3):
"""查找与指定电影最相似的电影(基于概述的语义相似度)。"""
with conn.cursor() as cur:
# 用子查询取出目标电影的嵌入,再用 <=> 搜索
cur.execute("""
SELECT m.title, m.year, m.genre,
1 - (m.embedding <=> (SELECT embedding FROM movies WHERE title = %s)) AS similarity
FROM movies m
WHERE m.title != %s
ORDER BY m.embedding <=> (SELECT embedding FROM movies WHERE title = %s)
LIMIT %s;
""", (movie_title, movie_title, movie_title, top_k))
return cur.fetchall()
def search_by_description(conn, query: str, top_k: int = 3):
"""用自然语言描述搜索电影。"""
q_emb = np.array(get_embeddings_batch([query])[0])
with conn.cursor() as cur:
cur.execute("""
SELECT title, year, genre,
1 - (embedding <=> %s::vector) AS similarity
FROM movies
ORDER BY embedding <=> %s::vector
LIMIT %s;
""", (q_emb, q_emb, top_k))
return cur.fetchall()
def summarize_reviews(conn, movie_title: str) -> str:
"""用 LLM 总结电影评论。"""
with conn.cursor() as cur:
cur.execute("""
SELECT r.review_text FROM reviews r
JOIN movies m ON r.movie_id = m.id
WHERE m.title = %s;
""", (movie_title,))
reviews = [row[0] for row in cur.fetchall()]
if not reviews:
return "暂无评论"
review_text = "\n".join(f'- "{r}"' for r in reviews)
prompt = f"请用中文简洁概括以下影评的核心观点:\n{review_text}"
return chat(prompt)
# 测试
print("查找与「Inception」相似的电影:")
for title, year, genre, sim in find_similar_movies(conn, "Inception"):
print(f" #{1} {title} ({year}) [{genre}] 相似度: {sim:.4f}")
print('\n按描述搜索 "space exploration and humanity\'s future":')
for title, year, genre, sim in search_by_description(conn, "space exploration and humanity's future"):
print(f" #{1} {title} ({year}) [{genre}] 相似度: {sim:.4f}")
print("\n评论摘要(Inception):")
print(f" {summarize_reviews(conn, 'Inception')}")
测试结果:
查找与「Inception」相似的电影:
#1 Parasite (2019) [Thriller] 相似度: 0.5802
#2 The Shawshank Redemption (1994) [Drama] 相似度: 0.5799
#3 The Grand Budapest Hotel (2014) [Comedy] 相似度: 0.5766
按描述搜索 "space exploration and humanity's future":
#1 Interstellar (2014) [Sci-Fi] 相似度: 0.6166 ← 精准命中!
#2 Arrival (2016) [Sci-Fi] 相似度: 0.5613
评论摘要(Inception):
原始: "Mind-bending masterpiece! Nolan outdid himself with the dream layers."
"Great visuals but confusing plot. Had to watch twice to understand."
AI 摘要: 诺兰烧脑神作,梦境层层嵌套;视觉惊艳但剧情晦涩,需二刷才懂。
AI 聊天机器人
构建基于知识库的 AI 聊天机器人,支持多轮对话和流式输出。维护对话历史,每轮对话先检索知识库获取上下文,结合历史构建 prompt。
核心代码
python
import sys
from shared.db import get_connection, execute_sql
from shared.embeddings import get_embeddings_batch
from shared.llm import chat_stream
import numpy as np
# 复用RAG的知识库
conn = get_connection()
def retrieve_context(conn, question: str, top_k: int = 3) -> list[dict]:
"""从知识库检索与当前问题最相关的文档。"""
q_emb = np.array(get_embeddings_batch([question])[0])
with conn.cursor() as cur:
cur.execute("""
SELECT title, content, 1 - (embedding <=> %s::vector) AS similarity
FROM knowledge_base
ORDER BY embedding <=> %s::vector
LIMIT %s;
""", (q_emb, q_emb, top_k))
return [{"title": r[0], "content": r[1], "similarity": r[2]} for r in cur.fetchall()]
def chatbot_turn(conn, question: str, history: list[dict], top_k: int = 3):
"""
单轮对话:检索 → 构造 prompt → 流式生成回答。
history 格式: [{"role": "user"/"assistant", "content": "..."}]
"""
# 1. 检索相关上下文
sources = retrieve_context(conn, question, top_k)
context = "\n\n".join([f"[{s['title']}] {s['content']}" for s in sources])
# 2. 构造系统提示
system = f"""You are a knowledgeable AI assistant. Answer questions based on the provided references.
Answer in Chinese when the user asks in Chinese.
References:
{context}"""
# 3. 流式输出
full_response = ""
for token in chat_stream(question, messages=history, system=system):
print(token, end="", flush=True)
full_response += token
print() # 换行
# 4. 更新对话历史
history.append({"role": "user", "content": question})
history.append({"role": "assistant", "content": full_response})
return full_response, sources
# 交互式聊天
def interactive_chat():
history = []
print("=== AI 聊天机器人(输入 quit 退出)===")
questions = [
"What is Amazon Aurora?",
"How does it compare to regular PostgreSQL?",
"Can I use it for vector search?",
]
for q in questions:
print(f"\n[User] {q}")
print("[Bot] ", end="")
_, sources = chatbot_turn(conn, q, history)
print(f" (检索到: {', '.join(s['title'] for s in sources)})")
interactive_chat()
测试结果:
[Turn 1] User: What is Amazon Aurora?
检索到: Amazon Aurora, Aurora ML, Transformer Architecture
Bot: Amazon Aurora 是一种兼容 MySQL 和 PostgreSQL 的关系型数据库,专为云环境构建...
[Turn 2] User: How does it compare to regular PostgreSQL?
检索到: pgvector Extension, pgvector Distance Operators, Amazon Aurora
Bot: Aurora 的吞吐量是标准 PostgreSQL 的 3 倍...
[Turn 3] User: Can I use it for vector search?
Bot: 是的,您可以将 Amazon Aurora 用于向量搜索...
RAG vs 无 RAG 对比:
Q: "What is Aurora ML?"
[RAG]: 根据知识库中的信息,Aurora ML 是 Amazon Aurora 的一项功能,
支持在数据库内进行机器学习推理... (基于事实)
[No RAG]: Aurora ML can refer to a few different things depending on context...
(泛化回答,不够精确)
语义缓存聊天机器人
用 pgvector 表替代 Redis/Valkey 实现语义缓存,减少 LLM 调用。将历史问答对存入 pgvector 表,新问题先搜索缓存表,语义相似度超过阈值则直接返回缓存答案。
核心代码
python
import time
from shared.db import get_connection, execute_sql
from shared.embeddings import get_embeddings_batch
from shared.llm import chat
import numpy as np
conn = get_connection()
# 缓存表:存储问答对及其嵌入
execute_sql(conn, """
DROP TABLE IF EXISTS cache;
CREATE TABLE cache (
id serial PRIMARY KEY,
question text,
answer text,
embedding vector(1024),
created_at timestamp default now()
);
""")
def query_with_cache(conn, question: str, threshold: float = 0.90):
"""
带语义缓存的查询:
1. 将问题转为向量
2. 在缓存表中搜索最相似的历史问题
3. 相似度 >= threshold → 返回缓存答案(跳过 LLM 调用)
4. 相似度 < threshold → 调用 LLM → 将问答对写入缓存
"""
q_emb = np.array(get_embeddings_batch([question])[0])
# Step 1: 在缓存表中搜索最相似的历史问题
with conn.cursor() as cur:
cur.execute("""
SELECT question, answer, 1 - (embedding <=> %s::vector) AS similarity
FROM cache
ORDER BY embedding <=> %s::vector
LIMIT 1;
""", (q_emb, q_emb))
cache_hit = cur.fetchone()
# Step 2: 判断是否命中缓存
if cache_hit and cache_hit[2] >= threshold:
cached_q, cached_a, sim = cache_hit
return cached_a, True, sim # 缓存命中
# Step 3: 缓存未命中,调用 LLM
t0 = time.time()
answer = chat(question)
elapsed = time.time() - t0
# Step 4: 写入缓存
with conn.cursor() as cur:
cur.execute(
"INSERT INTO cache (question, answer, embedding) VALUES (%s, %s, %s)",
(question, answer, q_emb)
)
actual_sim = cache_hit[2] if cache_hit else 0
return answer, False, (actual_sim, elapsed)
# 测试语义缓存
print("=== 语义缓存测试 ===\n")
threshold = 0.90
test_questions = [
"What are the best hotels in Tokyo?",
"Recommend good accommodations in Tokyo", # 语义相似但措辞不同
"Top places to stay in Tokyo for tourists", # 再次变化
]
for q in test_questions:
answer, cached, detail = query_with_cache(conn, q, threshold)
if cached:
print(f' "{q}"')
print(f" → 缓存命中! 相似度: {detail:.4f}")
else:
sim, elapsed = detail
print(f' "{q}"')
print(f" → 缓存未命中 | 最近缓存相似度: {sim:.4f} | LLM 耗时: {elapsed:.2f}s")
print(f" 回答: {answer[:80]}...\n")
# 阈值调优分析
print("=== 阈值调优 ===")
q1_emb = np.array(get_embeddings_batch(["What are the best hotels in Tokyo?"])[0])
q2_emb = np.array(get_embeddings_batch(["Recommend good accommodations in Tokyo"])[0])
actual_sim = 1 - float(np.dot(q1_emb, q2_emb) / (np.linalg.norm(q1_emb) * np.linalg.norm(q2_emb)))
# 用 pgvector 的余弦距离更准确
with conn.cursor() as cur:
cur.execute("SELECT %s::vector <=> %s::vector AS dist", (q1_emb, q2_emb))
cosine_dist = cur.fetchone()[0]
actual_cosine_sim = 1 - cosine_dist
print(f' "What are the best hotels in Tokyo?" vs "Recommend good accommodations in Tokyo"')
print(f" 实际相似度: {actual_cosine_sim:.4f}")
for t in [0.80, 0.85, 0.90, 0.95]:
status = "命中" if actual_cosine_sim >= t else "未命中"
print(f" 阈值 {t:.2f}: {status}")
测试结果:
=== 语义缓存测试 ===
"What are the best hotels in Tokyo?"
→ 缓存未命中 | 最近缓存相似度: 0.0000 | LLM 耗时: 4.83s
"Recommend good accommodations in Tokyo"
→ 缓存未命中 | 最近缓存相似度: 0.7556 | LLM 耗时: 2.51s (阈值 0.92 过高)
=== 阈值调优 ===
"What are the best hotels in Tokyo?" vs "Recommend good accommodations in Tokyo"
实际相似度: 0.7556
阈值 0.80: 未命中
阈值 0.85: 未命中
阈值 0.90: 未命中
阈值 0.95: 未命中
结论如下
- 当前阈值 0.90 过高,语义相似的问题相似度只有 0.76
- 生产环境建议阈值设为 0.70-0.80,可显著提高命中率
- 缓存命中时响应时间接近 0(省去 LLM 调用),成本节省明显
- pgvector 表做缓存完全可行,无需额外引入 Redis