PostgreSQL性能优化实战------索引、SQL与参数调优
1. 引言
数据库性能问题80%来自SQL和索引设计,20%来自配置参数。
2. 慢查询分析
2.1 开启慢查询日志
# postgresql.conf
log_min_duration_statement = 1000 # 记录1秒以上的查询
log_line_prefix = '%t [%p] %u@%d '
log_statement = 'ddl' # 记录DDL操作
log_checkpoints = on
log_lock_waits = on
2.2 分析慢查询
sql
-- 查看当前慢查询
SELECT pid, usename, datname,
round(1000 * (now() - query_start), 2) AS duration_ms,
query
FROM pg_stat_activity
WHERE state = 'active'
AND now() - query_start > interval '500 milliseconds'
ORDER BY duration_ms DESC;
-- 统计历史慢查询(使用pg_stat_statements扩展)
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
SELECT queryid, query, calls,
total_exec_time, mean_exec_time, max_exec_time,
rows, shared_blks_hit + shared_blks_read AS total_blocks
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 10;
3. 执行计划解读
sql
-- 获取执行计划
EXPLAIN (ANALYZE, BUFFERS, TIMING, FORMAT TEXT)
SELECT * FROM orders WHERE customer_id = 12345 AND order_date > '2024-01-01';
-- 关键指标解读
-- Seq Scan:全表扫描(数据量大时效率低)
-- Index Scan:索引扫描(高效)
-- Bitmap Index Scan:位图索引扫描(适合多条件)
-- Nested Loop:嵌套循环连接
-- Hash Join:哈希连接(适合大表连接)
-- Merge Join:归并连接(适合已排序数据)
4. 索引优化实战
4.1 索引类型选择
| 索引类型 | 适用场景 | 示例 |
|---|---|---|
| B-Tree | 等值、范围查询、排序 | CREATE INDEX idx_name ON table(col) |
| Hash | 等值查询 | CREATE INDEX idx_hash ON table USING HASH(col) |
| GIN | 数组、全文检索、JSON | CREATE INDEX idx_gin ON table USING GIN(json_col) |
| BRIN | 超大表、线性相关数据 | CREATE INDEX idx_brin ON table USING BRIN(created_at) |
4.2 复合索引最佳实践
sql
-- 错误示例:索引列顺序不当
CREATE INDEX idx_wrong ON orders (order_date, customer_id);
-- 查询:WHERE customer_id = 123 AND order_date > '2024-01-01'
-- 索引只能用到customer_id条件
-- 正确示例:等值查询列在前,范围查询列在后
CREATE INDEX idx_correct ON orders (customer_id, order_date);
-- 覆盖索引(避免回表)
CREATE INDEX idx_covering ON orders (customer_id, order_date)
INCLUDE (total_amount, status);
4.3 索引维护
sql
-- 查找未使用的索引
SELECT schemaname, tablename, indexname, idx_scan, idx_tup_read
FROM pg_stat_user_indexes
WHERE idx_scan = 0 AND idx_tup_read = 0;
-- 查找重复索引
SELECT pg_size_pretty(sum(pg_relation_size(indexrelid))::bigint) AS size,
array_agg(indexrelid::regclass) AS indexes
FROM pg_index
GROUP BY indrelid, indkey, indcollation, indclass, indoption
HAVING count(*) > 1;
-- 重建索引(减少膨胀)
REINDEX INDEX CONCURRENTLY idx_name;
REINDEX TABLE CONCURRENTLY table_name;
5. SQL优化技巧
5.1 避免常见低效写法
sql
-- 低效:函数包裹索引列
SELECT * FROM users WHERE lower(email) = 'admin@example.com';
-- 高效
SELECT * FROM users WHERE email = 'admin@example.com';
-- 或创建表达式索引
CREATE INDEX idx_lower_email ON users (lower(email));
-- 低效:隐式类型转换
SELECT * FROM orders WHERE order_id = '12345'; -- order_id是int类型
-- 高效
SELECT * FROM orders WHERE order_id = 12345;
-- 低效:使用SELECT *
SELECT * FROM orders WHERE customer_id = 100;
-- 高效:只取需要的列
SELECT order_id, order_date, total_amount FROM orders WHERE customer_id = 100;
5.2 分页优化
sql
-- 低效:传统OFFSET(大偏移量时慢)
SELECT * FROM orders ORDER BY id LIMIT 10 OFFSET 100000;
-- 高效:游标分页(Keyset Pagination)
SELECT * FROM orders
WHERE id > 100000
ORDER BY id LIMIT 10;
-- 高效:延迟关联
SELECT o.* FROM orders o
INNER JOIN (
SELECT id FROM orders
ORDER BY id LIMIT 10 OFFSET 100000
) t ON o.id = t.id;
5.3 批量操作优化
sql
-- 批量INSERT
INSERT INTO logs (user_id, action, created_at) VALUES
(1, 'login', now()), (2, 'click', now()), (3, 'purchase', now());
-- 使用COPY(极速导入)
COPY users FROM '/data/users.csv' DELIMITER ',' CSV HEADER;
-- 批量UPDATE(使用临时表)
CREATE TEMP TABLE temp_updates AS
SELECT id, new_value FROM source_table;
UPDATE target t
SET value = tu.new_value
FROM temp_updates tu
WHERE t.id = tu.id;
6. 内存参数调优
ini
# 假设服务器配置:32GB RAM,8核CPU
# 共享内存(核心参数)
shared_buffers = 8GB # 缓存数据块
huge_pages = try # 使用大页内存
# 工作内存(每个连接)
work_mem = 64MB # 排序/哈希表
maintenance_work_mem = 2GB # VACUUM/索引创建
temp_buffers = 32MB # 临时表
# 写入相关
wal_buffers = 32MB
effective_io_concurrency = 200 # SSD硬盘可调高
# 优化器评估
effective_cache_size = 24GB # 操作系统缓存大小
random_page_cost = 1.1 # SSD硬盘调低到1.1
cpu_tuple_cost = 0.01
7. 连接池配置(PgBouncer示例)
ini
# pgbouncer.ini
[databases]
mydb = host=localhost port=5432 dbname=mydb
[pgbouncer]
listen_addr = 0.0.0.0
listen_port = 6432
auth_type = md5
auth_file = /etc/pgbouncer/userlist.txt
pool_mode = transaction # 事务级连接池
max_client_conn = 1000
default_pool_size = 50
reserve_pool_size = 10
8. 性能监控指标
sql
-- 缓存命中率(应>99%)
SELECT round(100 * sum(heap_blks_hit) / nullif(sum(heap_blks_hit + heap_blks_read), 0), 2) AS cache_hit_ratio
FROM pg_statio_user_tables;
-- 索引命中率
SELECT round(100 * sum(idx_blks_hit) / nullif(sum(idx_blks_hit + idx_blks_read), 0), 2) AS idx_hit_ratio
FROM pg_statio_user_indexes;
-- 事务提交率
SELECT round(100 * xact_commit / nullif(xact_commit + xact_rollback, 0), 2) AS commit_ratio
FROM pg_stat_database WHERE datname = current_database();
-- 检查点频率(5分钟内不应超过1次)
SELECT checkpoints_timed, checkpoints_req,
round(checkpoints_req * 100 / nullif(checkpoints_timed + checkpoints_req, 0), 2) AS req_ratio
FROM pg_stat_bgwriter;