Redis服务配置知识点详解
一、Redis数据持久化
1.1 RDB持久化机制
知识点概述 :
RDB(Redis Database)持久化是将内存中的数据生成快照保存到磁盘的过程。它通过fork子进程的方式创建数据快照,生成.rdb文件。
配置语法:
bash
# redis.conf 配置
save 900 1 # 900秒内至少有1个key改变
save 300 10 # 300秒内至少有10个key改变
save 60 10000 # 60秒内至少有10000个key改变
dbfilename dump.rdb # RDB文件名
dir ./data # 文件保存目录
rdbcompression yes # 是否压缩
rdbchecksum yes # 是否校验和
案例代码:
python
import redis
import time
# 连接Redis
r = redis.Redis(host='localhost', port=6379, db=0, password='yourpassword')
# 手动触发RDB保存
def manual_rdb_save():
"""
手动触发RDB持久化
- bgsave: 后台异步保存(推荐)
- save: 前台同步保存(会阻塞)
"""
try:
# 使用BGSAVE异步保存
result = r.execute_command('BGSAVE')
print(f"BGSAVE触发结果: {result}")
# 检查持久化状态
info = r.info('persistence')
print(f"RDB上次保存时间: {info['rdb_last_save_time']}")
print(f"RDB是否正在保存: {info['rdb_bgsave_in_progress']}")
except Exception as e:
print(f"RDB保存失败: {e}")
# 模拟数据写入并验证持久化
def simulate_data_persistence():
"""
模拟数据写入并验证RDB持久化
"""
# 1. 写入测试数据
for i in range(10000):
key = f"test:key:{i}"
value = f"value_{i}_" + "x" * 100 # 制造一定大小的数据
r.set(key, value)
if i % 1000 == 0:
print(f"已写入 {i} 条数据")
# 每1000条触发一次BGSAVE
r.execute_command('BGSAVE')
# 2. 验证数据持久化
print("\n验证持久化数据:")
last_key = f"test:key:{9999}"
if r.exists(last_key):
print(f"最后一条数据已持久化: {last_key}")
print(f"数据大小: {r.memory_usage(last_key)} bytes")
# 3. 获取RDB文件信息
config = r.config_get('dbfilename')
print(f"RDB文件名: {config['dbfilename']}")
if __name__ == "__main__":
manual_rdb_save()
simulate_data_persistence()
1.2 RDB数据恢复案例
知识点概述 :
RDB文件用于Redis重启时的数据恢复,系统会自动读取dump.rdb文件加载数据。
案例代码:
python
import redis
import os
import shutil
import time
class RDBRecoveryDemo:
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(host=host, port=port, password=password, decode_responses=True)
self.backup_dir = '/tmp/redis_backup'
def prepare_test_data(self):
"""准备测试数据"""
print("1. 准备测试数据...")
# 不同数据类型的测试数据
test_data = {
# String类型
'str:name': 'Redis Demo',
'str:version': '7.0',
# List类型
'list:cities': ['北京', '上海', '广州', '深圳'],
# Hash类型
'hash:user:1001': {'name': '张三', 'age': '28', 'city': '北京'},
'hash:user:1002': {'name': '李四', 'age': '32', 'city': '上海'},
# Set类型
'set:tags': {'redis', 'database', 'cache', 'nosql'},
# ZSet类型
'zset:scores': {'player1': 100, 'player2': 85, 'player3': 95}
}
# 清理旧数据
self.client.flushdb()
# 写入String类型
for key, value in test_data.items():
if key.startswith('str:'):
self.client.set(key, value)
elif key.startswith('list:'):
self.client.rpush(key, *value)
elif key.startswith('hash:'):
self.client.hset(key, mapping=value)
elif key.startswith('set:'):
self.client.sadd(key, *value)
elif key.startswith('zset:'):
self.client.zadd(key, {k: v for k, v in value.items()})
print("测试数据准备完成")
self._print_data_stats()
def _print_data_stats(self):
"""打印数据统计"""
print("\n当前数据统计:")
keys = self.client.keys('*')
print(f"总键数: {len(keys)}")
for key in keys:
key_type = self.client.type(key)
if key_type == 'string':
print(f" {key}: {self.client.get(key)}")
elif key_type == 'list':
print(f" {key}: {self.client.lrange(key, 0, -1)}")
elif key_type == 'hash':
print(f" {key}: {self.client.hgetall(key)}")
elif key_type == 'set':
print(f" {key}: {self.client.smembers(key)}")
elif key_type == 'zset':
print(f" {key}: {self.client.zrange(key, 0, -1, withscores=True)}")
def backup_rdb_file(self):
"""备份RDB文件"""
print("\n2. 备份RDB文件...")
# 获取RDB文件路径
rdb_filename = self.client.config_get('dbfilename')['dbfilename']
rdb_dir = self.client.config_get('dir')['dir']
rdb_path = os.path.join(rdb_dir, rdb_filename)
# 手动触发RDB保存
self.client.execute_command('BGSAVE')
time.sleep(2) # 等待保存完成
# 创建备份目录
if not os.path.exists(self.backup_dir):
os.makedirs(self.backup_dir)
# 备份RDB文件
backup_path = os.path.join(self.backup_dir, f"dump_{int(time.time())}.rdb")
if os.path.exists(rdb_path):
shutil.copy2(rdb_path, backup_path)
print(f"RDB文件已备份到: {backup_path}")
print(f"文件大小: {os.path.getsize(backup_path)} bytes")
else:
print("RDB文件不存在")
def simulate_crash_and_recover(self):
"""模拟崩溃并恢复"""
print("\n3. 模拟崩溃和数据恢复...")
# 模拟Redis重启
print("正在停止Redis服务...")
os.system("redis-cli shutdown")
time.sleep(2)
print("正在启动Redis服务...")
os.system("redis-server /etc/redis/redis.conf")
time.sleep(3)
# 重新连接
self.client = redis.Redis(host='localhost', port=6379, decode_responses=True)
# 验证恢复的数据
print("\n4. 验证恢复的数据:")
self._print_data_stats()
def verify_data_integrity(self):
"""验证数据完整性"""
print("\n5. 验证数据完整性...")
# 检查特定键是否存在
expected_keys = ['str:name', 'hash:user:1001', 'set:tags']
for key in expected_keys:
if self.client.exists(key):
print(f"✓ {key} 存在")
else:
print(f"✗ {key} 丢失")
# 检查数据类型
key = 'hash:user:1001'
if self.client.exists(key):
data_type = self.client.type(key)
if data_type == 'hash':
data = self.client.hgetall(key)
print(f"✓ {key} 类型正确: {data_type}, 数据: {data}")
else:
print(f"✗ {key} 类型错误: {data_type}")
def run_recovery_demo(self):
"""运行恢复演示"""
try:
self.prepare_test_data()
self.backup_rdb_file()
self.simulate_crash_and_recover()
self.verify_data_integrity()
except Exception as e:
print(f"恢复演示失败: {e}")
if __name__ == "__main__":
demo = RDBRecoveryDemo(password='yourpassword')
demo.run_recovery_demo()
1.3 AOF持久化机制
知识点概述 :
AOF(Append Only File)以日志形式记录每个写操作,追加到文件中。恢复时重新执行这些命令。
配置语法:
bash
# redis.conf
appendonly yes # 开启AOF
appendfilename "appendonly.aof" # AOF文件名
appendfsync everysec # 同步频率:always|everysec|no
no-appendfsync-on-rewrite no # 重写时是否不执行fsync
auto-aof-rewrite-percentage 100 # 增长率触发重写
auto-aof-rewrite-min-size 64mb # 最小大小触发重写
aof-load-truncated yes # 加载截断的AOF文件
aof-use-rdb-preamble yes # 使用RDB作为AOF的前缀
案例代码:
python
import redis
import time
import threading
class AOFDemo:
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(host=host, port=port, password=password, decode_responses=True)
def check_aof_status(self):
"""检查AOF状态"""
info = self.client.info('persistence')
print("=== AOF状态 ===")
print(f"AOF开启: {info['aof_enabled']}")
print(f"AOF文件大小: {info.get('aof_current_size', 0)} bytes")
print(f"AOF基础文件大小: {info.get('aof_base_size', 0)} bytes")
print(f"AOF同步策略: {self.client.config_get('appendfsync')['appendfsync']}")
print(f"待处理的重写操作: {info.get('aof_rewrite_in_progress', False)}")
def simulate_write_operations(self):
"""模拟写操作"""
print("\n=== 模拟写操作 ===")
operations = [
("set", "user:1", "张三"),
("set", "user:2", "李四"),
("hset", "user:1:info", "age", "25"),
("hset", "user:1:info", "city", "北京"),
("lpush", "recent_users", "user:1", "user:2"),
("sadd", "active_users", "user:1"),
("zadd", "scores", {"user:1": 95, "user:2": 87})
]
for i, op in enumerate(operations, 1):
try:
if op[0] == "set":
self.client.set(op[1], op[2])
print(f"[{i}] SET {op[1]} = {op[2]}")
elif op[0] == "hset":
self.client.hset(op[1], op[2], op[3])
print(f"[{i}] HSET {op[1]} {op[2]} = {op[3]}")
elif op[0] == "lpush":
self.client.lpush(op[1], *op[2:])
print(f"[{i}] LPUSH {op[1]} {op[2:]}")
elif op[0] == "sadd":
self.client.sadd(op[1], *op[2:])
print(f"[{i}] SADD {op[1]} {op[2:]}")
elif op[0] == "zadd":
self.client.zadd(op[1], op[2])
print(f"[{i}] ZADD {op[1]} {op[2]}")
time.sleep(0.5) # 模拟操作间隔
except Exception as e:
print(f"操作失败: {e}")
def monitor_aof_file(self):
"""监控AOF文件增长"""
def _monitor():
while self.monitoring:
info = self.client.info('persistence')
current_size = info.get('aof_current_size', 0)
base_size = info.get('aof_base_size', 0)
print(f"\rAOF当前大小: {current_size/1024:.2f}KB, "
f"基础大小: {base_size/1024:.2f}KB", end="")
time.sleep(2)
self.monitoring = True
monitor_thread = threading.Thread(target=_monitor)
monitor_thread.daemon = True
monitor_thread.start()
return monitor_thread
def manual_aof_rewrite(self):
"""手动触发AOF重写"""
print("\n=== 手动触发AOF重写 ===")
try:
# BGREWRITEAOF 命令
result = self.client.execute_command('BGREWRITEAOF')
print(f"AOF重写触发结果: {result}")
# 监控重写进度
for i in range(10):
info = self.client.info('persistence')
if info.get('aof_rewrite_in_progress') == 1:
print(f"AOF重写进行中... ({i+1}/10)")
else:
print("AOF重写完成!")
# 获取重写后的信息
new_info = self.client.info('persistence')
print(f"重写后大小: {new_info.get('aof_current_size', 0)} bytes")
break
time.sleep(1)
except Exception as e:
print(f"AOF重写失败: {e}")
def verify_aof_commands(self):
"""验证AOF命令记录"""
print("\n=== 验证AOF命令记录 ===")
# 通过命令统计查看效果
cmd_stats = self.client.info('commandstats')
print("命令执行统计:")
for cmd, stats in cmd_stats.items():
if cmd.startswith('cmdstat_'):
cmd_name = cmd[8:]
calls = stats['calls']
if int(calls) > 0:
print(f" {cmd_name}: {stats}")
def run_aof_demo(self):
"""运行AOF演示"""
try:
# 1. 检查初始状态
self.check_aof_status()
# 2. 启动监控
monitor_thread = self.monitor_aof_file()
# 3. 模拟写操作
self.simulate_write_operations()
# 4. 触发AOF重写
time.sleep(2)
self.manual_aof_rewrite()
# 5. 停止监控
self.monitoring = False
monitor_thread.join(timeout=1)
# 6. 验证命令
self.verify_aof_commands()
except Exception as e:
print(f"AOF演示失败: {e}")
if __name__ == "__main__":
demo = AOFDemo(password='yourpassword')
demo.run_aof_demo()
1.4 AOF数据恢复案例
知识点概述 :
AOF文件损坏时的修复和恢复策略。
案例代码:
python
import redis
import os
import subprocess
import tempfile
class AOFRecoveryDemo:
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(host=host, port=port, password=password, decode_responses=True)
self.aof_path = self._get_aof_path()
def _get_aof_path(self):
"""获取AOF文件路径"""
aof_filename = self.client.config_get('appendfilename')['appendfilename']
aof_dir = self.client.config_get('dir')['dir']
return os.path.join(aof_dir, aof_filename)
def create_test_scenario(self):
"""创建测试场景"""
print("=== 创建测试数据 ===")
# 清空数据库
self.client.flushdb()
# 写入测试数据
test_data = {
'counter': 0,
'list:queue': [],
'hash:config': {}
}
# 模拟业务操作
for i in range(100):
# 递增计数器
self.client.incr('counter')
# 添加队列元素
self.client.rpush('list:queue', f"item_{i}")
# 更新配置
self.client.hset('hash:config', f"key_{i}", f"value_{i}")
# 设置过期时间
if i % 10 == 0:
self.client.expire('counter', 3600)
# 删除操作
if i % 20 == 0:
self.client.lpop('list:queue')
if i % 50 == 0:
print(f"已完成 {i} 次操作")
# 验证数据
print(f"最终计数器值: {self.client.get('counter')}")
print(f"队列长度: {self.client.llen('list:queue')}")
print(f"配置项数量: {self.client.hlen('hash:config')}")
# 强制AOF同步
self.client.execute_command('BGREWRITEAOF')
time.sleep(2)
def simulate_aof_corruption(self):
"""模拟AOF文件损坏"""
print("\n=== 模拟AOF文件损坏 ===")
# 备份原文件
backup_path = self.aof_path + ".backup"
os.system(f"cp {self.aof_path} {backup_path}")
# 损坏AOF文件:在文件末尾添加无效数据
with open(self.aof_path, 'ab') as f:
f.write(b"INVALID COMMAND DATA\x00\x01\x02")
print(f"AOF文件已损坏: {self.aof_path}")
print(f"备份文件: {backup_path}")
def repair_aof_file(self):
"""修复AOF文件"""
print("\n=== 修复AOF文件 ===")
# 使用redis-check-aof工具修复
try:
# 检查AOF文件
check_cmd = f"redis-check-aof {self.aof_path}"
result = subprocess.run(check_cmd.split(), capture_output=True, text=True)
print("检查结果:", result.stdout)
# 修复AOF文件
fix_cmd = f"redis-check-aof --fix {self.aof_path}"
print(f"执行修复命令: {fix_cmd}")
# 自动确认修复
process = subprocess.Popen(
fix_cmd.split(),
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
stdout, stderr = process.communicate(input='y\n')
print("修复输出:", stdout)
except Exception as e:
print(f"修复失败: {e}")
def recover_from_aof(self):
"""从AOF恢复数据"""
print("\n=== 从AOF恢复数据 ===")
# 停止Redis
print("停止Redis服务...")
os.system("redis-cli shutdown")
time.sleep(2)
# 启动Redis
print("启动Redis服务...")
os.system("redis-server /etc/redis/redis.conf")
time.sleep(3)
# 重新连接
self.client = redis.Redis(host='localhost', port=6379, password='yourpassword', decode_responses=True)
# 验证恢复的数据
print("\n恢复后的数据:")
print(f"计数器值: {self.client.get('counter')}")
print(f"队列前5个元素: {self.client.lrange('list:queue', 0, 5)}")
print(f"配置项示例: {self.client.hgetall('hash:config')}")
def verify_recovery(self):
"""验证恢复结果"""
print("\n=== 验证恢复结果 ===")
# 检查数据完整性
expected_checks = [
('counter', 'string'),
('list:queue', 'list'),
('hash:config', 'hash')
]
for key, expected_type in expected_checks:
if self.client.exists(key):
actual_type = self.client.type(key)
if actual_type == expected_type:
print(f"✓ {key}: 存在且类型正确 ({actual_type})")
else:
print(f"✗ {key}: 类型错误 (期望: {expected_type}, 实际: {actual_type})")
else:
print(f"✗ {key}: 不存在")
# 检查数据量
print(f"\n键总数: {len(self.client.keys('*'))}")
def run_recovery_demo(self):
"""运行恢复演示"""
try:
# 1. 创建测试数据
self.create_test_scenario()
# 2. 备份AOF文件
os.system(f"cp {self.aof_path} {self.aof_path}.good")
# 3. 模拟损坏
self.simulate_aof_corruption()
# 4. 尝试重启(应该失败)
print("\n尝试重启Redis(预期失败)...")
os.system("redis-server /etc/redis/redis.conf")
time.sleep(2)
# 5. 修复AOF
self.repair_aof_file()
# 6. 恢复数据
self.recover_from_aof()
# 7. 验证恢复
self.verify_recovery()
except Exception as e:
print(f"恢复演示失败: {e}")
if __name__ == "__main__":
demo = AOFRecoveryDemo(password='yourpassword')
demo.run_recovery_demo()
二、Redis线程模型
2.1 事件驱动模型
知识点概述 :
Redis基于Reactor模式实现的事件驱动模型,使用单线程处理网络IO请求。
案例代码:
python
import redis
import time
import threading
from concurrent.futures import ThreadPoolExecutor
class RedisThreadModelDemo:
"""Redis线程模型演示"""
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(
host=host,
port=port,
password=password,
decode_responses=True,
socket_keepalive=True,
socket_connect_timeout=5
)
def analyze_thread_model(self):
"""分析Redis线程模型"""
print("=== Redis线程模型分析 ===")
# 获取Redis服务器信息
info = self.client.info()
# 线程相关统计
print(f"Redis版本: {info['redis_version']}")
print(f"进程ID: {info['process_id']}")
print(f"TCP端口: {info['tcp_port']}")
print(f"已连接客户端数: {info['connected_clients']}")
print(f"线程数: {self._get_server_threads()}")
# 事件循环统计
print(f"总连接数: {info['total_connections_received']}")
print(f"拒绝连接数: {info.get('rejected_connections', 0)}")
def _get_server_threads(self):
"""获取服务器线程信息"""
try:
# 通过INFO命令获取更多线程信息
cpu_info = self.client.info('cpu')
return {
'used_cpu_sys': cpu_info.get('used_cpu_sys', 0),
'used_cpu_user': cpu_info.get('used_cpu_user', 0),
'used_cpu_sys_children': cpu_info.get('used_cpu_sys_children', 0),
'used_cpu_user_children': cpu_info.get('used_cpu_user_children', 0)
}
except:
return "线程信息不可用"
def simulate_concurrent_requests(self):
"""模拟并发请求"""
print("\n=== 模拟并发请求 ===")
def send_request(request_id):
"""发送单个请求"""
try:
start = time.time()
# 执行多个命令模拟业务逻辑
pipe = self.client.pipeline()
pipe.set(f"key:{request_id}", f"value:{request_id}")
pipe.get(f"key:{request_id}")
pipe.incr("global:counter")
pipe.lpush("request:queue", request_id)
pipe.execute()
elapsed = time.time() - start
return f"请求 {request_id} 完成, 耗时: {elapsed:.4f}秒"
except Exception as e:
return f"请求 {request_id} 失败: {e}"
# 使用线程池模拟并发
with ThreadPoolExecutor(max_workers=10) as executor:
futures = []
for i in range(20):
future = executor.submit(send_request, i)
futures.append(future)
# 收集结果
for future in futures:
print(future.result())
# 查看服务器处理统计
cmd_stats = self.client.info('commandstats')
print("\n命令处理统计:")
for cmd, stats in cmd_stats.items():
if cmd.startswith('cmdstat_'):
calls = stats['calls']
if int(calls) > 0:
print(f" {cmd}: {stats}")
def monitor_event_loop(self):
"""监控事件循环"""
print("\n=== 事件循环监控 ===")
# 使用LATENCY命令监控事件循环延迟
try:
# 获取事件循环延迟统计
latency_stats = self.client.execute_command('LATENCY', 'LATEST')
if latency_stats:
print("最新延迟统计:")
for stat in latency_stats:
print(f" {stat}")
# 获取事件循环历史
history = self.client.execute_command('LATENCY', 'HISTORY', 'command')
if history:
print("\n命令延迟历史:")
for entry in history[-5:]: # 显示最近5条
print(f" {entry}")
except redis.exceptions.ResponseError:
print("延迟监控未启用")
# 监控文件描述符
info = self.client.info('stats')
print(f"\n文件描述符统计:")
print(f" 最大文件描述符: {info.get('maxclients', 'N/A')}")
print(f" 当前连接数: {info.get('connected_clients', 'N/A')}")
print(f" 总连接数: {info.get('total_connections_received', 'N/A')}")
def demonstrate_non_blocking(self):
"""演示非阻塞操作"""
print("\n=== 非阻塞操作演示 ===")
def long_running_operation():
"""模拟长时间运行的操作"""
time.sleep(2) # 模拟耗时操作
return "长时间操作完成"
def quick_operation():
"""快速操作"""
return self.client.ping()
# 在后台执行耗时操作
import threading
result_container = []
def async_task():
result = long_running_operation()
result_container.append(result)
# 启动后台线程
thread = threading.Thread(target=async_task)
thread.start()
# 主线程继续执行其他操作
for i in range(5):
print(f"主线程执行快速操作 {i+1}: {quick_operation()}")
time.sleep(0.2)
# 等待后台任务完成
thread.join()
print(f"后台任务结果: {result_container[0]}")
# 解释Redis的单线程模型
print("\n=== Redis单线程模型说明 ===")
print("""
Redis虽然是单线程处理命令,但:
1. 使用I/O多路复用技术(epoll/kqueue)处理并发连接
2. 持久化操作由子进程处理,不阻塞主线程
3. 异步删除操作(UNLINK)可以避免大key阻塞
4. 每个命令执行都是原子的,无需加锁
""")
def run_demo(self):
"""运行演示"""
self.analyze_thread_model()
self.simulate_concurrent_requests()
self.monitor_event_loop()
self.demonstrate_non_blocking()
if __name__ == "__main__":
demo = RedisThreadModelDemo(password='yourpassword')
demo.run_demo()
2.2 IO多线程
知识点概述 :
Redis 6.0引入的多线程IO,用于处理网络数据的读写,但命令执行仍然是单线程。
配置语法:
bash
# redis.conf
io-threads 4 # IO线程数(通常设置为CPU核心数)
io-threads-do-reads yes # 读操作也使用IO线程
案例代码:
python
import redis
import time
import threading
import random
from concurrent.futures import ThreadPoolExecutor
class IOMultiThreadDemo:
"""Redis IO多线程演示"""
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(
host=host,
port=port,
password=password,
decode_responses=True,
socket_keepalive=True,
health_check_interval=30
)
def check_io_threads_config(self):
"""检查IO线程配置"""
print("=== IO线程配置检查 ===")
# 获取IO线程配置
io_threads = self.client.config_get('io-threads')
io_threads_do_reads = self.client.config_get('io-threads-do-reads')
print(f"IO线程数: {io_threads.get('io-threads', '未配置')}")
print(f"IO线程处理读操作: {io_threads_do_reads.get('io-threads-do-reads', '未配置')}")
# 获取更多服务器信息
info = self.client.info('server')
print(f"Redis版本: {info['redis_version']}")
print(f"架构: {info['arch_bits']}位")
print(f"进程ID: {info['process_id']}")
def configure_io_threads(self, thread_count=4, enable_reads=True):
"""配置IO线程(需要管理员权限)"""
print(f"\n=== 配置IO线程 ===")
print(f"设置线程数: {thread_count}")
print(f"启用读线程: {enable_reads}")
try:
# 动态配置IO线程(需要CONFIG SET权限)
self.client.config_set('io-threads', str(thread_count))
self.client.config_set('io-threads-do-reads', 'yes' if enable_reads else 'no')
# 验证配置
new_threads = self.client.config_get('io-threads')
print(f"配置成功: IO线程数 = {new_threads['io-threads']}")
except redis.exceptions.ResponseError as e:
print(f"配置失败: {e}")
print("注意: IO线程配置可能需要重启Redis服务生效")
def benchmark_with_different_threads(self):
"""不同IO线程数下的性能测试"""
print("\n=== IO线程性能对比测试 ===")
def execute_operations(count=1000):
"""执行一批操作"""
pipe = self.client.pipeline()
for i in range(count):
# 混合不同类型的操作
op_type = i % 5
key = f"bench:key:{i}"
if op_type == 0:
pipe.set(key, f"value_{i}")
elif op_type == 1:
pipe.get(key)
elif op_type == 2:
pipe.hset(f"bench:hash:{i%10}", f"field_{i}", f"value_{i}")
elif op_type == 3:
pipe.lpush(f"bench:list:{i%5}", f"item_{i}")
elif op_type == 4:
pipe.sadd(f"bench:set:{i%3}", f"member_{i}")
results = pipe.execute()
return len(results)
# 测试不同并发级别
for concurrency in [1, 5, 10, 20]:
print(f"\n并发级别: {concurrency}")
with ThreadPoolExecutor(max_workers=concurrency) as executor:
start_time = time.time()
futures = []
for _ in range(concurrency):
future = executor.submit(execute_operations, 500)
futures.append(future)
# 等待所有任务完成
total_ops = 0
for future in futures:
total_ops += future.result()
elapsed = time.time() - start_time
ops_per_sec = total_ops / elapsed
print(f" 总操作数: {total_ops}")
print(f" 耗时: {elapsed:.2f}秒")
print(f" 吞吐量: {ops_per_sec:.0f} ops/s")
def monitor_io_threads_usage(self):
"""监控IO线程使用情况"""
print("\n=== IO线程监控 ===")
# 获取CPU使用情况
cpu_info = self.client.info('cpu')
print("CPU使用统计:")
print(f" 系统CPU时间: {cpu_info.get('used_cpu_sys', 0):.2f}秒")
print(f" 用户CPU时间: {cpu_info.get('used_cpu_user', 0):.2f}秒")
print(f" 子进程系统CPU: {cpu_info.get('used_cpu_sys_children', 0):.2f}秒")
print(f" 子进程用户CPU: {cpu_info.get('used_cpu_user_children', 0):.2f}秒")
# 获取网络IO统计
stats = self.client.info('stats')
print("\n网络IO统计:")
print(f" 总输入流量: {stats.get('total_net_input_bytes', 0) / 1024 / 1024:.2f} MB")
print(f" 总输出流量: {stats.get('total_net_output_bytes', 0) / 1024 / 1024:.2f} MB")
print(f" 瞬时输入流量: {stats.get('instantaneous_input_kbps', 0):.2f} KB/s")
print(f" 瞬时输出流量: {stats.get('instantaneous_output_kbps', 0):.2f} KB/s")
def demonstrate_mixed_workload(self):
"""演示混合工作负载"""
print("\n=== 混合工作负载演示 ===")
def heavy_io_workload(worker_id, count=100):
"""模拟IO密集型负载"""
for i in range(count):
# 读取大字符串
key = f"large:key:{i % 10}"
value = self.client.get(key)
# 写入数据
new_key = f"worker:{worker_id}:{i}"
self.client.setex(new_key, 60, f"data_{i}" * 100)
# 列表操作
self.client.lpush(f"worker:{worker_id}:list", i)
if i % 10 == 0:
print(f" 工作器 {worker_id} 完成 {i} 次操作")
def cpu_intensive_workload(worker_id, count=50):
"""模拟CPU密集型负载(在客户端)"""
for i in range(count):
# 在客户端进行复杂计算
data = [random.random() for _ in range(1000)]
sorted_data = sorted(data)
avg = sum(sorted_data) / len(sorted_data)
# 简单Redis操作
self.client.set(f"cpu:worker:{worker_id}:{i}", str(avg))
if i % 10 == 0:
print(f" CPU工作器 {worker_id} 完成 {i} 次计算")
# 准备测试数据
print("准备测试数据...")
for i in range(10):
self.client.set(f"large:key:{i}", "x" * 1024 * 100) # 100KB的字符串
# 并发执行不同负载
with ThreadPoolExecutor(max_workers=8) as executor:
# 提交IO密集型任务
io_futures = []
for i in range(4):
future = executor.submit(heavy_io_workload, i, 50)
io_futures.append(future)
# 提交CPU密集型任务
cpu_futures = []
for i in range(4, 8):
future = executor.submit(cpu_intensive_workload, i, 25)
cpu_futures.append(future)
# 等待所有任务完成
for future in io_futures + cpu_futures:
future.result()
print("混合负载演示完成")
def analyze_io_threads_efficiency(self):
"""分析IO线程效率"""
print("\n=== IO线程效率分析 ===")
# 获取当前配置和状态
io_threads = self.client.config_get('io-threads')
current_threads = int(io_threads.get('io-threads', 1))
# 获取命令统计
cmd_stats = self.client.info('commandstats')
# 计算平均命令耗时
total_calls = 0
total_usec = 0
for cmd, stats in cmd_stats.items():
if cmd.startswith('cmdstat_'):
calls = int(stats['calls'])
usec = int(stats['usec'])
total_calls += calls
total_usec += usec
if total_calls > 0:
avg_usec = total_usec / total_calls
print(f"命令总数: {total_calls}")
print(f"总耗时: {total_usec / 1000000:.2f}秒")
print(f"平均命令耗时: {avg_usec:.2f}微秒")
# 连接统计
clients = self.client.info('clients')
print(f"\n连接统计:")
print(f" 已连接客户端: {clients['connected_clients']}")
print(f" 最大连接数: {clients.get('maxclients', 'N/A')}")
print(f" 阻塞客户端: {clients.get('blocked_clients', 0)}")
# IO线程效率建议
print(f"\nIO线程优化建议:")
if current_threads == 1:
print(" - 当前使用单线程IO,建议根据CPU核心数启用多线程IO")
print(f" - 推荐设置: io-threads {os.cpu_count() or 4}")
elif current_threads > os.cpu_count():
print(f" - IO线程数({current_threads})超过CPU核心数({os.cpu_count()}),可能降低效率")
else:
print(f" - IO线程配置合理")
def run_demo(self):
"""运行完整演示"""
print("=" * 50)
print("Redis IO多线程演示")
print("=" * 50)
self.check_io_threads_config()
# self.configure_io_threads(4, True) # 默认注释,避免修改配置
self.benchmark_with_different_threads()
self.monitor_io_threads_usage()
self.demonstrate_mixed_workload()
self.analyze_io_threads_efficiency()
if __name__ == "__main__":
import os
demo = IOMultiThreadDemo(password='yourpassword')
demo.run_demo()
三、Redis高级特性
3.1 过期数据淘汰策略
知识点概述 :
Redis的内存淘汰策略和过期键删除策略。
配置语法:
bash
# redis.conf
maxmemory 2gb # 最大内存限制
maxmemory-policy allkeys-lru # 内存淘汰策略
maxmemory-samples 5 # 淘汰采样数量
策略类型:
- noeviction:不淘汰,返回错误
- allkeys-lru:所有key中淘汰最近最少使用的
- volatile-lru:设置过期时间的key中淘汰最近最少使用的
- allkeys-random:所有key中随机淘汰
- volatile-random:设置过期时间的key中随机淘汰
- volatile-ttl:淘汰剩余时间最短的key
案例代码:
python
import redis
import time
import random
import string
class ExpirationEvictionDemo:
"""过期和淘汰策略演示"""
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(
host=host,
port=port,
password=password,
decode_responses=True
)
def check_memory_policy(self):
"""检查内存策略"""
print("=== 内存策略检查 ===")
maxmemory = self.client.config_get('maxmemory')
policy = self.client.config_get('maxmemory-policy')
samples = self.client.config_get('maxmemory-samples')
print(f"最大内存: {int(maxmemory['maxmemory']) / 1024 / 1024:.0f} MB")
print(f"淘汰策略: {policy['maxmemory-policy']}")
print(f"采样数量: {samples['maxmemory-samples']}")
# 内存使用情况
info = self.client.info('memory')
print(f"当前内存使用: {info['used_memory_human']}")
print(f"内存峰值: {info['used_memory_peak_human']}")
print(f"内存碎片率: {info['mem_fragmentation_ratio']}")
def demonstrate_expiration(self):
"""演示过期机制"""
print("\n=== 过期机制演示 ===")
# 1. 设置带过期时间的key
print("1. 设置带过期时间的key:")
# EXPIRE:设置秒级过期
self.client.set('key:expire:1', 'value1')
self.client.expire('key:expire:1', 5)
ttl = self.client.ttl('key:expire:1')
print(f" key:expire:1 过期时间: {ttl}秒")
# SETEX:设置时指定过期时间
self.client.setex('key:expire:2', 10, 'value2')
ttl = self.client.ttl('key:expire:2')
print(f" key:expire:2 过期时间: {ttl}秒")
# EXPIREAT:指定时间戳过期
import time
expire_at = int(time.time()) + 15
self.client.set('key:expire:3', 'value3')
self.client.expireat('key:expire:3', expire_at)
ttl = self.client.ttl('key:expire:3')
print(f" key:expire:3 过期时间戳: {expire_at}, TTL: {ttl}秒")
# 2. 监控过期
print("\n2. 监控过期过程:")
for i in range(7):
exists1 = self.client.exists('key:expire:1')
exists2 = self.client.exists('key:expire:2')
exists3 = self.client.exists('key:expire:3')
print(f" 第{i}秒: key1={'存在' if exists1 else '过期'}, "
f"key2={'存在' if exists2 else '过期'}, "
f"key3={'存在' if exists3 else '过期'}")
time.sleep(1)
def demonstrate_volatile_keys(self):
"""演示volatile键的管理"""
print("\n=== Volatile键管理 ===")
# 创建各种类型的键
print("创建测试键:")
# 无过期时间的键
self.client.set('persistent:key1', 'persistent value 1')
self.client.set('persistent:key2', 'persistent value 2')
print(" persistent:key1/2 - 持久键")
# 带过期时间的键
for i in range(10):
key = f'volatile:key:{i}'
self.client.setex(key, random.randint(30, 60), f'volatile value {i}')
print(" 10个volatile键 - 带过期时间")
# 检查过期信息
info = self.client.info('keyspace')
print(f"\n键空间信息:")
for db, stats in info.items():
if db.startswith('db'):
print(f" {db}: {stats}")
# 获取过期键统计
expired_keys = self.client.info('stats')['expired_keys']
print(f"历史过期键总数: {expired_keys}")
def simulate_memory_pressure(self):
"""模拟内存压力测试淘汰策略"""
print("\n=== 内存压力模拟 ===")
# 获取当前内存限制
maxmemory = int(self.client.config_get('maxmemory')['maxmemory'])
policy = self.client.config_get('maxmemory-policy')['maxmemory-policy']
print(f"当前配置: 最大内存={maxmemory/1024/1024:.0f}MB, 策略={policy}")
if maxmemory == 0:
print("未设置内存限制,跳过内存压力测试")
return
# 计算可用的内存空间
info = self.client.info('memory')
used_memory = info['used_memory']
available = maxmemory - used_memory
print(f"已用内存: {used_memory/1024/1024:.2f}MB")
print(f"可用内存: {available/1024/1024:.2f}MB")
if available <= 0:
print("内存已满,直接测试淘汰行为")
# 创建大键来触发淘汰
def create_large_key(key_prefix, size_mb):
"""创建大键"""
data = ''.join(random.choices(string.ascii_letters, k=size_mb * 1024 * 1024))
key = f"{key_prefix}:{int(time.time())}"
self.client.set(key, data)
return key
print("\n开始写入大键测试淘汰...")
created_keys = []
try:
for i in range(5):
key = create_large_key(f"large:test:{policy}", 10) # 10MB
created_keys.append(key)
print(f" 已创建: {key}")
# 查看淘汰统计
evicted_keys = self.client.info('stats')['evicted_keys']
print(f" 淘汰键数: {evicted_keys}")
time.sleep(1)
except redis.exceptions.ResponseError as e:
if "OOM" in str(e):
print(f" 内存不足错误: {e}")
else:
print(f" 错误: {e}")
def test_different_policies(self):
"""测试不同淘汰策略的效果"""
print("\n=== 策略效果测试 ===")
scenarios = [
('volatile-lru', '最近最少使用的过期键'),
('allkeys-lru', '最近最少使用的所有键'),
('volatile-random', '随机过期键'),
('allkeys-random', '随机所有键'),
('volatile-ttl', '剩余时间最短的过期键'),
('noeviction', '不淘汰,返回错误')
]
for policy, description in scenarios:
print(f"\n测试策略: {policy}")
print(f"描述: {description}")
# 临时修改策略(如果权限允许)
try:
original_policy = self.client.config_get('maxmemory-policy')['maxmemory-policy']
self.client.config_set('maxmemory-policy', policy)
# 创建测试场景
self.client.flushdb()
# 创建不同访问模式的键
for i in range(20):
# 设置不同的过期时间
if i < 10:
# 前10个键:热门访问
self.client.setex(f"hot:key:{i}", 60, f"hot value {i}")
for _ in range(i + 1): # 模拟多次访问
self.client.get(f"hot:key:{i}")
else:
# 后10个键:冷门访问
self.client.setex(f"cold:key:{i}", 60, f"cold value {i}")
time.sleep(0.1)
# 触发淘汰
self.simulate_memory_pressure()
# 查看剩余键
remaining_keys = self.client.keys('*')
print(f" 剩余键数: {len(remaining_keys)}")
print(f" 剩余键示例: {remaining_keys[:5]}")
# 恢复原策略
self.client.config_set('maxmemory-policy', original_policy)
except redis.exceptions.ResponseError as e:
print(f" 测试失败: {e}")
def monitor_expiration_events(self):
"""监控过期事件"""
print("\n=== 过期事件监控 ===")
# 启用keyspace事件通知(需要配置)
try:
self.client.config_set('notify-keyspace-events', 'Ex')
print("已启用过期事件通知")
# 创建订阅
pubsub = self.client.pubsub()
pubsub.psubscribe('__keyevent@0__:expired')
# 在单独线程中监听事件
import threading
def listen_events():
for message in pubsub.listen():
if message['type'] == 'pmessage':
print(f" 键过期: {message['data']}")
listener = threading.Thread(target=listen_events, daemon=True)
listener.start()
# 创建过期键
print("创建过期键...")
for i in range(5):
key = f"monitor:key:{i}"
self.client.setex(key, 2, f"value {i}")
print(f" 已创建: {key} (2秒后过期)")
# 等待过期
time.sleep(5)
except redis.exceptions.ResponseError as e:
print(f"事件监控失败: {e}")
def cleanup(self):
"""清理测试数据"""
print("\n=== 清理测试数据 ===")
self.client.flushdb()
print("测试数据已清理")
def run_demo(self):
"""运行完整演示"""
try:
self.check_memory_policy()
self.demonstrate_expiration()
self.demonstrate_volatile_keys()
self.simulate_memory_pressure()
self.test_different_policies()
self.monitor_expiration_events()
finally:
self.cleanup()
if __name__ == "__main__":
demo = ExpirationEvictionDemo(password='yourpassword')
demo.run_demo()
3.2 listpack数据结构
知识点概述 :
listpack是Redis用于存储小列表或小哈希的紧凑数据结构,节省内存。
配置语法:
bash
# redis.conf
hash-max-listpack-entries 512 # hash使用listpack的最大元素数
hash-max-listpack-value 64 # hash使用listpack的最大value长度
list-max-listpack-size -2 # list使用listpack的大小限制
set-max-intset-entries 512 # set使用intset的最大元素数
zset-max-listpack-entries 128 # zset使用listpack的最大元素数
zset-max-listpack-value 64 # zset使用listpack的最大value长度
案例代码:
python
import redis
import sys
import random
import string
class ListpackDemo:
"""listpack数据结构演示"""
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(
host=host,
port=port,
password=password,
decode_responses=True
)
def check_listpack_config(self):
"""检查listpack相关配置"""
print("=== listpack配置检查 ===")
configs = [
'hash-max-listpack-entries',
'hash-max-listpack-value',
'list-max-listpack-size',
'set-max-intset-entries',
'zset-max-listpack-entries',
'zset-max-listpack-value'
]
for config in configs:
value = self.client.config_get(config)
print(f"{config}: {value[config]}")
def demonstrate_hash_encoding(self):
"""演示hash的编码转换"""
print("\n=== Hash编码转换演示 ===")
# 获取当前阈值
max_entries = int(self.client.config_get('hash-max-listpack-entries')['hash-max-listpack-entries'])
max_value = int(self.client.config_get('hash-max-listpack-value')['hash-max-listpack-value'])
print(f"listpack阈值: entries={max_entries}, value_size={max_value}")
# 1. 小hash(使用listpack)
print("\n1. 创建小hash (listpack编码):")
small_hash = "hash:small"
for i in range(10): # 小于阈值
field = f"field{i}"
value = f"value{i}"
self.client.hset(small_hash, field, value)
encoding = self.client.object("encoding", small_hash)
print(f" 编码方式: {encoding}")
print(f" 元素数量: {self.client.hlen(small_hash)}")
print(f" 内存占用: {self.client.memory_usage(small_hash)} bytes")
# 2. 大hash(转换为hashtable)
print("\n2. 创建大hash (超出entries阈值):")
large_hash = "hash:large"
for i in range(max_entries + 50): # 超出阈值
field = f"field{i}"
value = f"value{i}"
self.client.hset(large_hash, field, value)
if i % 100 == 0 and i > 0:
print(f" 已添加 {i} 个字段...")
encoding = self.client.object("encoding", large_hash)
print(f" 编码方式: {encoding}")
print(f" 元素数量: {self.client.hlen(large_hash)}")
print(f" 内存占用: {self.client.memory_usage(large_hash)} bytes")
# 3. 大value hash
print("\n3. 创建大value hash (超出value阈值):")
value_hash = "hash:largevalue"
for i in range(10):
field = f"field{i}"
# 创建超过阈值的value
value = ''.join(random.choices(string.ascii_letters, k=max_value + 100))
self.client.hset(value_hash, field, value)
encoding = self.client.object("encoding", value_hash)
print(f" 编码方式: {encoding}")
print(f" 内存占用: {self.client.memory_usage(value_hash)} bytes")
# 比较内存使用
print("\n4. 内存使用对比:")
print(f" listpack hash占用: {self.client.memory_usage('hash:small')} bytes")
print(f" hashtable hash占用: {self.client.memory_usage('hash:large')} bytes")
# 计算平均每元素内存
small_avg = self.client.memory_usage('hash:small') / self.client.hlen('hash:small')
large_avg = self.client.memory_usage('hash:large') / self.client.hlen('hash:large')
print(f" listpack平均: {small_avg:.1f} bytes/元素")
print(f" hashtable平均: {large_avg:.1f} bytes/元素")
print(f" 内存节省: {(1 - small_avg/large_avg)*100:.1f}%")
def demonstrate_list_encoding(self):
"""演示list的编码转换"""
print("\n=== List编码转换演示 ===")
list_config = self.client.config_get('list-max-listpack-size')
print(f"list-max-listpack-size: {list_config['list-max-listpack-size']}")
# 1. 小list(使用listpack)
print("\n1. 创建小list (listpack编码):")
small_list = "list:small"
for i in range(10):
self.client.lpush(small_list, f"item{i}")
encoding = self.client.object("encoding", small_list)
print(f" 编码方式: {encoding}")
print(f" 长度: {self.client.llen(small_list)}")
print(f" 内存占用: {self.client.memory_usage(small_list)} bytes")
# 2. 大list(转换为linkedlist或quicklist)
print("\n2. 创建大list (超出阈值):")
large_list = "list:large"
for i in range(1000):
self.client.lpush(large_list, f"item{i}" * 100) # 较大元素
encoding = self.client.object("encoding", large_list)
print(f" 编码方式: {encoding}")
print(f" 长度: {self.client.llen(large_list)}")
print(f" 内存占用: {self.client.memory_usage(large_list)} bytes")
def demonstrate_zset_encoding(self):
"""演示zset的编码转换"""
print("\n=== ZSet编码转换演示 ===")
# 获取阈值
max_entries = int(self.client.config_get('zset-max-listpack-entries')['zset-max-listpack-entries'])
max_value = int(self.client.config_get('zset-max-listpack-value')['zset-max-listpack-value'])
print(f"zset listpack阈值: entries={max_entries}, value_size={max_value}")
# 1. 小zset(使用listpack)
print("\n1. 创建小zset (listpack编码):")
small_zset = "zset:small"
for i in range(50): # 小于阈值
score = i
member = f"member{i}"
self.client.zadd(small_zset, {member: score})
encoding = self.client.object("encoding", small_zset)
print(f" 编码方式: {encoding}")
print(f" 元素数量: {self.client.zcard(small_zset)}")
print(f" 内存占用: {self.client.memory_usage(small_zset)} bytes")
# 2. 大zset(转换为skiplist)
print("\n2. 创建大zset (超出entries阈值):")
large_zset = "zset:large"
for i in range(max_entries + 50): # 超出阈值
score = i
member = f"member{i}"
self.client.zadd(large_zset, {member: score})
if i % 100 == 0 and i > 0:
print(f" 已添加 {i} 个成员...")
encoding = self.client.object("encoding", large_zset)
print(f" 编码方式: {encoding}")
print(f" 元素数量: {self.client.zcard(large_zset)}")
print(f" 内存占用: {self.client.memory_usage(large_zset)} bytes")
# 3. 大value zset
print("\n3. 创建大value zset (超出value阈值):")
value_zset = "zset:largevalue"
for i in range(50):
score = i
member = 'x' * (max_value + 100) # 超长member
self.client.zadd(value_zset, {member: score})
encoding = self.client.object("encoding", value_zset)
print(f" 编码方式: {encoding}")
print(f" 内存占用: {self.client.memory_usage(value_zset)} bytes")
def demonstrate_set_encoding(self):
"""演示set的编码转换"""
print("\n=== Set编码转换演示 ===")
max_entries = int(self.client.config_get('set-max-intset-entries')['set-max-intset-entries'])
print(f"set-max-intset-entries: {max_entries}")
# 1. 整数set(使用intset)
print("\n1. 创建整数set (intset编码):")
int_set = "set:int"
for i in range(100):
self.client.sadd(int_set, i)
encoding = self.client.object("encoding", int_set)
print(f" 编码方式: {encoding}")
print(f" 元素数量: {self.client.scard(int_set)}")
print(f" 内存占用: {self.client.memory_usage(int_set)} bytes")
# 2. 字符串set(使用hashtable)
print("\n2. 创建字符串set (hashtable编码):")
str_set = "set:string"
for i in range(100):
self.client.sadd(str_set, f"string{i}")
encoding = self.client.object("encoding", str_set)
print(f" 编码方式: {encoding}")
print(f" 内存占用: {self.client.memory_usage(str_set)} bytes")
# 3. 混合set(转换为hashtable)
print("\n3. 创建混合set (混合类型):")
mixed_set = "set:mixed"
for i in range(50):
self.client.sadd(mixed_set, i) # 整数
if i == 25: # 添加一个字符串触发转换
self.client.sadd(mixed_set, "trigger")
encoding = self.client.object("encoding", mixed_set)
print(f" 编码方式: {encoding}")
print(f" 内存占用: {self.client.memory_usage(mixed_set)} bytes")
def optimize_memory_usage(self):
"""优化内存使用"""
print("\n=== 内存优化演示 ===")
# 1. 使用hash代替多个key
print("\n1. Hash vs 独立Key:")
# 独立key方式
start_mem = self.client.info('memory')['used_memory']
for i in range(1000):
self.client.set(f"user:{i}:name", f"user{i}")
self.client.set(f"user:{i}:age", i)
self.client.set(f"user:{i}:city", "Beijing")
end_mem = self.client.info('memory')['used_memory']
keys_mem = end_mem - start_mem
print(f" 独立key占用: {keys_mem/1024:.2f}KB")
# hash方式
self.client.flushdb()
start_mem = self.client.info('memory')['used_memory']
for i in range(1000):
self.client.hset(f"user:{i}", mapping={
'name': f"user{i}",
'age': i,
'city': "Beijing"
})
end_mem = self.client.info('memory')['used_memory']
hash_mem = end_mem - start_mem
print(f" hash占用: {hash_mem/1024:.2f}KB")
print(f" 内存节省: {(1 - hash_mem/keys_mem)*100:.1f}%")
# 2. 使用整数编码
print("\n2. 整数编码优化:")
# 字符串数字
start_mem = self.client.info('memory')['used_memory']
for i in range(1000):
self.client.set(f"str:num:{i}", str(i))
end_mem = self.client.info('memory')['used_memory']
str_mem = end_mem - start_mem
# 整数
self.client.flushdb()
start_mem = self.client.info('memory')['used_memory']
for i in range(1000):
self.client.set(f"int:num:{i}", i)
end_mem = self.client.info('memory')['used_memory']
int_mem = end_mem - start_mem
print(f" 字符串数字占用: {str_mem/1024:.2f}KB")
print(f" 整数占用: {int_mem/1024:.2f}KB")
print(f" 内存节省: {(1 - int_mem/str_mem)*100:.1f}%")
# 3. listpack配置建议
print("\n3. listpack优化建议:")
print("""
根据数据特征调整阈值:
- 如果元素都很小,可以增大阈值
- 如果元素大小不一,保持默认值
- 监控object encoding确认编码方式
- 使用MEMORY USAGE命令分析内存
""")
def cleanup(self):
"""清理测试数据"""
self.client.flushdb()
print("\n测试数据已清理")
def run_demo(self):
"""运行完整演示"""
try:
self.check_listpack_config()
self.demonstrate_hash_encoding()
self.demonstrate_list_encoding()
self.demonstrate_zset_encoding()
self.demonstrate_set_encoding()
self.optimize_memory_usage()
finally:
self.cleanup()
if __name__ == "__main__":
demo = ListpackDemo(password='yourpassword')
demo.run_demo()
3.3 碎片整理
知识点概述 :
Redis内存碎片整理功能,用于优化内存使用。
配置语法:
bash
# redis.conf
activedefrag yes # 启用主动碎片整理
active-defrag-ignore-bytes 100mb # 碎片字节阈值
active-defrag-threshold-lower 10 # 碎片百分比下阈值
active-defrag-threshold-upper 100 # 碎片百分百上阈值
active-defrag-cycle-min 25 # 最小CPU时间占比
active-defrag-cycle-max 75 # 最大CPU时间占比
active-defrag-max-scan-fields 1000 # 最大扫描字段数
案例代码:
python
import redis
import time
import random
import string
class DefragmentationDemo:
"""碎片整理演示"""
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(
host=host,
port=port,
password=password,
decode_responses=True
)
def check_defrag_config(self):
"""检查碎片整理配置"""
print("=== 碎片整理配置检查 ===")
configs = [
'activedefrag',
'active-defrag-ignore-bytes',
'active-defrag-threshold-lower',
'active-defrag-threshold-upper',
'active-defrag-cycle-min',
'active-defrag-cycle-max',
'active-defrag-max-scan-fields'
]
for config in configs:
value = self.client.config_get(config)
print(f"{config}: {value[config]}")
def create_fragmentation(self):
"""创建内存碎片"""
print("\n=== 创建内存碎片场景 ===")
# 1. 创建和删除大量不同大小的键
print("1. 创建和删除不同大小的键...")
for size in [100, 1000, 10000, 100000]: # 不同大小的数据
for i in range(100):
key = f"frag:key:{size}:{i}"
value = 'x' * size
self.client.set(key, value)
# 随机删除一些键
if random.random() < 0.3:
self.client.delete(key)
if i % 20 == 0:
print(f" 已处理 {size}字节大小的键 {i}/100")
# 2. 频繁修改现有键
print("\n2. 频繁修改键(改变大小)...")
# 创建基础键
for i in range(50):
key = f"frag:modify:{i}"
self.client.set(key, 'x' * 5000)
# 反复修改大小
for i in range(100):
for j in range(50):
key = f"frag:modify:{j}"
# 随机改变大小
new_size = random.randint(100, 10000)
self.client.set(key, 'x' * new_size)
if i % 10 == 0:
print(f" 已完成 {i} 轮修改")
# 3. 列表操作造成碎片
print("\n3. 列表操作...")
list_key = "frag:list"
for i in range(1000):
# 推入不同大小的元素
size = random.randint(100, 5000)
self.client.lpush(list_key, 'x' * size)
# 随机弹出
if random.random() < 0.4:
self.client.rpop(list_key)
if i % 200 == 0:
print(f" 已处理 {i} 个列表操作")
def monitor_fragmentation(self):
"""监控内存碎片"""
print("\n=== 内存碎片监控 ===")
for i in range(10):
info = self.client.info('memory')
used_memory = info['used_memory']
rss = info.get('used_memory_rss', 0)
if rss > 0:
frag_ratio = info['mem_fragmentation_ratio']
print(f"\n第{i+1}次检查:")
print(f" 已用内存: {used_memory/1024/1024:.2f} MB")
print(f" RSS内存: {rss/1024/1024:.2f} MB")
print(f" 碎片率: {frag_ratio:.2f}")
# 碎片分析
if frag_ratio < 1:
print(f" 状态: 使用swap或内存不足")
elif frag_ratio < 1.5:
print(f" 状态: 正常")
elif frag_ratio < 2:
print(f" 状态: 轻度碎片")
else:
print(f" 状态: 严重碎片")
else:
print(f"RSS信息不可用,使用其他指标:")
print(f" 已用内存: {used_memory/1024/1024:.2f} MB")
time.sleep(2)
def manual_defrag(self):
"""手动触发碎片整理"""
print("\n=== 手动碎片整理 ===")
try:
# 检查是否启用自动碎片整理
activedefrag = self.client.config_get('activedefrag')['activedefrag']
print(f"自动碎片整理: {'启用' if activedefrag == 'yes' else '禁用'}")
# 通过MEMORY PURGE命令尝试整理
print("\n执行 MEMORY PURGE...")
result = self.client.execute_command('MEMORY PURGE')
print(f"结果: {result}")
# 监控整理效果
time.sleep(2)
info = self.client.info('memory')
print(f"整理后内存: {info['used_memory']/1024/1024:.2f} MB")
print(f"整理后碎片率: {info.get('mem_fragmentation_ratio', 'N/A')}")
except redis.exceptions.ResponseError as e:
print(f"手动整理失败: {e}")
def configure_defrag(self):
"""配置碎片整理参数"""
print("\n=== 配置碎片整理 ===")
try:
# 启用主动碎片整理
self.client.config_set('activedefrag', 'yes')
print("已启用主动碎片整理")
# 设置阈值
self.client.config_set('active-defrag-ignore-bytes', '10mb')
self.client.config_set('active-defrag-threshold-lower', '20')
self.client.config_set('active-defrag-threshold-upper', '30')
print("已设置碎片阈值:")
print(f" ignore-bytes: {self.client.config_get('active-defrag-ignore-bytes')['active-defrag-ignore-bytes']}")
print(f" lower: {self.client.config_get('active-defrag-threshold-lower')['active-defrag-threshold-lower']}%")
print(f" upper: {self.client.config_get('active-defrag-threshold-upper')['active-defrag-threshold-upper']}%")
except redis.exceptions.ResponseError as e:
print(f"配置失败: {e}")
def simulate_long_running(self):
"""模拟长时间运行的碎片整理"""
print("\n=== 模拟长时间运行 ===")
# 获取碎片整理统计
def get_defrag_stats():
info = self.client.info('stats')
return {
'defrag_hits': info.get('active_defrag_hits', 0),
'defrag_misses': info.get('active_defrag_misses', 0),
'defrag_key_hits': info.get('active_defrag_key_hits', 0),
'defrag_key_misses': info.get('active_defrag_key_misses', 0)
}
initial_stats = get_defrag_stats()
print(f"初始整理统计:")
for k, v in initial_stats.items():
print(f" {k}: {v}")
# 持续操作产生碎片
print("\n持续产生碎片操作...")
for i in range(50):
# 创建和删除键
for j in range(10):
key = f"temp:{i}:{j}"
self.client.setex(key, 60, 'x' * random.randint(1000, 10000))
# 修改现有键
for j in range(5):
key = f"existing:{j}"
if not self.client.exists(key):
self.client.set(key, 'x' * 5000)
else:
new_size = random.randint(100, 10000)
self.client.set(key, 'x' * new_size)
if i % 10 == 0:
current_stats = get_defrag_stats()
print(f" 第{i}轮后:")
for k, v in current_stats.items():
diff = v - initial_stats[k]
if diff > 0:
print(f" {k}: +{diff}")
time.sleep(1)
def analyze_fragmentation(self):
"""分析碎片状况"""
print("\n=== 碎片分析 ===")
info = self.client.info('memory')
print("内存使用详情:")
print(f" 已用内存: {info['used_memory_human']}")
print(f" 峰值内存: {info['used_memory_peak_human']}")
print(f" RSS: {info.get('used_memory_rss_human', 'N/A')}")
print(f" 数据集大小: {info.get('used_memory_dataset_human', 'N/A')}")
print(f" 碎片率: {info.get('mem_fragmentation_ratio', 'N/A')}")
# 计算浪费的内存
if 'used_memory_rss' in info and info['used_memory'] > 0:
wasted = info['used_memory_rss'] - info['used_memory']
wasted_percent = (wasted / info['used_memory_rss']) * 100
print(f" 浪费内存: {wasted/1024/1024:.2f} MB ({wasted_percent:.1f}%)")
# 碎片整理建议
frag_ratio = info.get('mem_fragmentation_ratio', 0)
if frag_ratio > 1.5:
print("\n建议:")
print(" 1. 启用主动碎片整理")
print(" 2. 调整碎片整理阈值")
print(" 3. 考虑重启实例(维护窗口)")
elif frag_ratio > 1:
print("\n内存状况良好")
def cleanup(self):
"""清理测试数据"""
# 删除测试键
for key in self.client.scan_iter("frag:*"):
self.client.delete(key)
for key in self.client.scan_iter("temp:*"):
self.client.delete(key)
for key in self.client.scan_iter("existing:*"):
self.client.delete(key)
print("\n测试数据已清理")
def run_demo(self):
"""运行完整演示"""
try:
self.check_defrag_config()
self.create_fragmentation()
self.monitor_fragmentation()
self.manual_defrag()
self.configure_defrag()
self.simulate_long_running()
self.analyze_fragmentation()
finally:
self.cleanup()
if __name__ == "__main__":
demo = DefragmentationDemo(password='yourpassword')
demo.run_demo()
3.4 SLOWLOG慢日志
知识点概述 :
记录执行时间超过阈值的命令,用于性能分析。
配置语法:
bash
# redis.conf
slowlog-log-slower-than 10000 # 慢查询阈值(微秒)
slowlog-max-len 128 # 慢日志最大长度
案例代码:
python
import redis
import time
import random
class SlowLogDemo:
"""慢日志演示"""
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(
host=host,
port=port,
password=password,
decode_responses=True,
socket_keepalive=True
)
def check_slowlog_config(self):
"""检查慢日志配置"""
print("=== 慢日志配置检查 ===")
# 获取慢日志配置
slowlog_threshold = self.client.config_get('slowlog-log-slower-than')
slowlog_max_len = self.client.config_get('slowlog-max-len')
print(f"慢查询阈值: {slowlog_threshold['slowlog-log-slower-than']} 微秒")
print(f"慢日志最大长度: {slowlog_max_len['slowlog-max-len']}")
# 转换为更易读的格式
threshold_us = int(slowlog_threshold['slowlog-log-slower-than'])
if threshold_us < 0:
print(" 状态: 禁用慢日志")
else:
print(f" 阈值: {threshold_us/1000:.2f} 毫秒")
def configure_slowlog(self, threshold=10000, max_len=128):
"""配置慢日志参数"""
print(f"\n=== 配置慢日志 ===")
print(f"设置阈值: {threshold} 微秒 ({threshold/1000} 毫秒)")
print(f"设置最大长度: {max_len}")
try:
self.client.config_set('slowlog-log-slower-than', str(threshold))
self.client.config_set('slowlog-max-len', str(max_len))
print("配置成功")
except Exception as e:
print(f"配置失败: {e}")
def generate_slow_commands(self):
"""生成慢命令"""
print("\n=== 生成慢命令 ===")
# 1. 创建大量数据
print("1. 创建测试数据...")
for i in range(10000):
key = f"slow:test:{i}"
self.client.set(key, f"value_{i}" * 100) # 创建大value
if i % 1000 == 0:
print(f" 已创建 {i} 个键")
# 2. 执行可能较慢的命令
slow_commands = [
("KEYS *", lambda: self.client.keys('*')),
("SORT", lambda: self.client.sort('slow:test:*', alpha=True)),
("LRANGE大列表", lambda: self._create_large_list()),
("ZRANGE大zset", lambda: self._create_large_zset()),
("多个命令管道", lambda: self._pipeline_operations()),
("大key删除", lambda: self._delete_large_key())
]
for desc, cmd_func in slow_commands:
try:
print(f"\n执行: {desc}")
start = time.time()
result = cmd_func()
elapsed = (time.time() - start) * 1000 # 转换为毫秒
print(f" 耗时: {elapsed:.2f} 毫秒")
# 如果是KEYS命令,结果可能很大
if desc == "KEYS *" and result:
print(f" 返回键数: {len(result)}")
except Exception as e:
print(f" 失败: {e}")
def _create_large_list(self):
"""创建大列表"""
list_key = "slow:large:list"
for i in range(5000):
self.client.rpush(list_key, f"item_{i}" * 100)
return self.client.lrange(list_key, 0, -1)
def _create_large_zset(self):
"""创建大zset"""
zset_key = "slow:large:zset"
for i in range(5000):
self.client.zadd(zset_key, {f"member_{i}": i})
return self.client.zrange(zset_key, 0, -1, withscores=True)
def _pipeline_operations(self):
"""管道操作"""
pipe = self.client.pipeline()
for i in range(1000):
pipe.set(f"pipe:key:{i}", f"value_{i}")
pipe.get(f"pipe:key:{i}")
return pipe.execute()
def _delete_large_key(self):
"""删除大key"""
large_key = "slow:large:string"
self.client.set(large_key, "x" * 10_000_000) # 10MB
return self.client.delete(large_key)
def view_slowlog(self):
"""查看慢日志"""
print("\n=== 查看慢日志 ===")
# 获取当前慢日志长度
slowlog_len = self.client.slowlog_len()
print(f"当前慢日志数量: {slowlog_len}")
if slowlog_len == 0:
print("没有慢日志记录")
return
# 获取所有慢日志
slowlogs = self.client.slowlog_get()
print("\n慢日志详情:")
for i, log in enumerate(slowlogs, 1):
log_id = log['id']
timestamp = log['start_time']
duration = log['duration']
command = ' '.join(log['command'])
client_addr = log.get('client_addr', 'unknown')
client_name = log.get('client_name', 'unknown')
# 转换时间
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))
print(f"\n{i}. 日志ID: {log_id}")
print(f" 时间: {time_str}")
print(f" 耗时: {duration} 微秒 ({duration/1000:.2f} 毫秒)")
print(f" 命令: {command[:100]}{'...' if len(command) > 100 else ''}")
print(f" 客户端: {client_addr} ({client_name})")
def analyze_slowlog(self):
"""分析慢日志"""
print("\n=== 慢日志分析 ===")
slowlogs = self.client.slowlog_get()
if not slowlogs:
print("没有慢日志可分析")
return
# 统计
total_duration = 0
command_stats = {}
slowest_log = None
slowest_duration = 0
for log in slowlogs:
duration = log['duration']
total_duration += duration
# 找出最慢的命令
if duration > slowest_duration:
slowest_duration = duration
slowest_log = log
# 命令类型统计
cmd_type = log['command'][0] if log['command'] else 'unknown'
if cmd_type not in command_stats:
command_stats[cmd_type] = {
'count': 0,
'total_duration': 0,
'max_duration': 0
}
command_stats[cmd_type]['count'] += 1
command_stats[cmd_type]['total_duration'] += duration
command_stats[cmd_type]['max_duration'] = max(
command_stats[cmd_type]['max_duration'],
duration
)
print(f"慢日志统计:")
print(f" 总记录数: {len(slowlogs)}")
print(f" 总耗时: {total_duration/1000:.2f} 毫秒")
print(f" 平均耗时: {total_duration/len(slowlogs)/1000:.2f} 毫秒")
if slowest_log:
cmd = ' '.join(slowest_log['command'])
print(f"\n最慢命令:")
print(f" 耗时: {slowest_duration/1000:.2f} 毫秒")
print(f" 命令: {cmd[:200]}")
print("\n按命令类型统计:")
for cmd_type, stats in sorted(
command_stats.items(),
key=lambda x: x[1]['total_duration'],
reverse=True
):
avg_duration = stats['total_duration'] / stats['count']
print(f" {cmd_type}:")
print(f" 次数: {stats['count']}")
print(f" 总耗时: {stats['total_duration']/1000:.2f} 毫秒")
print(f" 平均耗时: {avg_duration/1000:.2f} 毫秒")
print(f" 最大耗时: {stats['max_duration']/1000:.2f} 毫秒")
def reset_slowlog(self):
"""重置慢日志"""
print("\n=== 重置慢日志 ===")
self.client.slowlog_reset()
print("慢日志已重置")
def demonstrate_slowlog_usage(self):
"""演示慢日志的实际使用"""
print("\n=== 慢日志使用场景 ===")
scenarios = [
("1. 性能问题排查", self._performance_troubleshooting),
("2. 命令优化验证", self._command_optimization),
("3. 客户端行为分析", self._client_analysis)
]
for title, scenario_func in scenarios:
print(f"\n{title}")
scenario_func()
def _performance_troubleshooting(self):
"""性能问题排查"""
print(" 场景: 发现Redis响应变慢,通过慢日志定位问题")
# 重置慢日志
self.client.slowlog_reset()
# 执行一些可能导致问题的命令
print(" 执行可能导致问题的命令...")
self.client.keys('*') # KEYS命令在生产环境要避免
self.client.sort('slow:test:*', alpha=True) # 大列表排序
# 查看慢日志
slowlogs = self.client.slowlog_get(5) # 获取最近5条
if slowlogs:
print(" 发现的慢命令:")
for log in slowlogs:
cmd = ' '.join(log['command'][:2]) # 只显示前两部分
duration = log['duration']
print(f" - {cmd} 耗时: {duration/1000:.2f}毫秒")
def _command_optimization(self):
"""命令优化验证"""
print(" 场景: 优化前后的性能对比")
# 重置慢日志
self.client.slowlog_reset()
# 优化前:使用KEYS
print(" 优化前: 使用KEYS命令")
self.client.keys('slow:test:*')
# 优化后:使用SCAN
print(" 优化后: 使用SCAN命令")
cursor = '0'
while cursor != 0:
cursor, keys = self.client.scan(cursor, match='slow:test:*', count=100)
# 查看慢日志对比
slowlogs = self.client.slowlog_get(10)
for log in slowlogs:
cmd = ' '.join(log['command'])
if 'KEYS' in cmd or 'SCAN' in cmd:
duration = log['duration']
print(f" {cmd[:50]} 耗时: {duration/1000:.2f}毫秒")
def _client_analysis(self):
"""客户端行为分析"""
print(" 场景: 分析不同客户端的命令模式")
# 重置慢日志
self.client.slowlog_reset()
# 模拟不同客户端的操作
print(" 模拟多个客户端的操作...")
# 客户端A: 大量写入
for i in range(100):
self.client.set(f"clientA:key:{i}", "x" * 1000)
# 客户端B: 大量读取
for i in range(100):
self.client.get(f"clientA:key:{random.randint(0, 99)}")
# 客户端C: 复杂操作
pipe = self.client.pipeline()
for i in range(50):
pipe.hset(f"clientC:hash:{i}", f"field{i}", f"value{i}")
pipe.zadd(f"clientC:zset:{i%10}", {f"member{i}": i})
pipe.execute()
# 分析慢日志中的客户端信息
slowlogs = self.client.slowlog_get()
client_commands = {}
for log in slowlogs:
# 简化分析,实际可以基于client_addr
cmd_type = log['command'][0]
if cmd_type not in client_commands:
client_commands[cmd_type] = 0
client_commands[cmd_type] += 1
print(" 命令分布:")
for cmd, count in client_commands.items():
print(f" {cmd}: {count}次")
def cleanup(self):
"""清理测试数据"""
print("\n=== 清理测试数据 ===")
# 删除测试键
for key in self.client.scan_iter("slow:*"):
self.client.delete(key)
for key in self.client.scan_iter("pipe:*"):
self.client.delete(key)
for key in self.client.scan_iter("client*:*"):
self.client.delete(key)
print("测试数据已清理")
def run_demo(self):
"""运行完整演示"""
try:
self.check_slowlog_config()
self.configure_slowlog(5000, 128) # 5ms阈值
self.generate_slow_commands()
self.view_slowlog()
self.analyze_slowlog()
self.demonstrate_slowlog_usage()
self.reset_slowlog()
finally:
self.cleanup()
if __name__ == "__main__":
demo = SlowLogDemo(password='yourpassword')
demo.run_demo()
3.5 延迟监控
知识点概述 :
Redis内置的延迟监控功能,可以监控各种操作的延迟。
配置语法:
bash
# redis.conf
latency-monitor-threshold 100 # 延迟监控阈值(毫秒)
案例代码:
python
import redis
import time
import random
import threading
class LatencyMonitoringDemo:
"""延迟监控演示"""
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(
host=host,
port=port,
password=password,
decode_responses=True,
socket_keepalive=True
)
def check_latency_config(self):
"""检查延迟监控配置"""
print("=== 延迟监控配置检查 ===")
# 获取延迟监控阈值
threshold = self.client.config_get('latency-monitor-threshold')
print(f"延迟监控阈值: {threshold['latency-monitor-threshold']} 毫秒")
# 如果阈值为0,表示禁用
if int(threshold['latency-monitor-threshold']) == 0:
print(" 状态: 禁用")
else:
print(" 状态: 启用")
def configure_latency_monitor(self, threshold_ms=100):
"""配置延迟监控"""
print(f"\n=== 配置延迟监控 ===")
print(f"设置阈值: {threshold_ms} 毫秒")
try:
self.client.config_set('latency-monitor-threshold', str(threshold_ms))
print("配置成功")
except Exception as e:
print(f"配置失败: {e}")
def generate_latency_events(self):
"""生成延迟事件"""
print("\n=== 生成延迟事件 ===")
# 1. 执行可能延迟的命令
latency_commands = [
("慢查询命令", self._slow_command),
("大key操作", self._large_key_operation),
("阻塞命令", self._blocking_command),
("fork操作", self._fork_operation),
("过期清理", self._expire_operation)
]
for desc, cmd_func in latency_commands:
print(f"\n执行: {desc}")
start = time.time()
result = cmd_func()
elapsed = (time.time() - start) * 1000
print(f" 耗时: {elapsed:.2f} 毫秒")
def _slow_command(self):
"""执行慢命令"""
# 创建测试数据
for i in range(1000):
self.client.set(f"latency:test:{i}", "x" * 1000)
# 执行可能导致延迟的命令
self.client.keys('latency:test:*')
return "完成"
def _large_key_operation(self):
"""大key操作"""
# 创建大key
large_key = "latency:large:string"
self.client.set(large_key, "x" * 10_000_000) # 10MB
# 操作大key
self.client.get(large_key)
self.client.strlen(large_key)
return "完成"
def _blocking_command(self):
"""阻塞命令"""
# BLPOP 阻塞操作(设置超时1秒)
result = self.client.blpop("nonexistent:list", timeout=1)
return result
def _fork_operation(self):
"""模拟fork操作"""
# 触发BGSAVE,会fork子进程
self.client.execute_command('BGSAVE')
time.sleep(0.5)
return "完成"
def _expire_operation(self):
"""过期清理"""
# 创建大量带过期时间的key
for i in range(10000):
key = f"latency:expire:{i}"
self.client.setex(key, 1, f"value_{i}")
# 等待过期并触发清理
time.sleep(2)
self.client.execute_command('MEMORY PURGE')
return "完成"
def view_latency_events(self):
"""查看延迟事件"""
print("\n=== 查看延迟事件 ===")
# 获取所有事件类型
try:
event_types = self.client.execute_command('LATENCY', 'LATEST')
if not event_types:
print("没有延迟事件记录")
return
print("最新延迟事件:")
for event in event_types:
# 格式: [事件名, 时间戳, 最新延迟, 最大延迟]
event_name = event[0]
timestamp = event[1]
latest_latency = event[2]
max_latency = event[3]
time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(timestamp))
print(f"\n 事件类型: {event_name}")
print(f" 发生时间: {time_str}")
print(f" 最新延迟: {latest_latency} 毫秒")
print(f" 最大延迟: {max_latency} 毫秒")
except redis.exceptions.ResponseError as e:
print(f"获取延迟事件失败: {e}")
def analyze_event_history(self, event_name='command'):
"""分析特定事件的历史"""
print(f"\n=== 分析事件历史: {event_name} ===")
try:
# 获取事件历史
history = self.client.execute_command('LATENCY', 'HISTORY', event_name)
if not history:
print(f"没有 {event_name} 事件的历史记录")
return
print(f"事件 {event_name} 的历史记录:")
timestamps = []
latencies = []
for entry in history:
timestamp = entry[0]
latency = entry[1]
time_str = time.strftime('%H:%M:%S', time.localtime(timestamp))
timestamps.append(time_str)
latencies.append(latency)
print(f" {time_str} - {latency} 毫秒")
# 简单统计
if latencies:
avg_latency = sum(latencies) / len(latencies)
max_latency = max(latencies)
min_latency = min(latencies)
print(f"\n统计信息:")
print(f" 记录数: {len(latencies)}")
print(f" 平均延迟: {avg_latency:.2f} 毫秒")
print(f" 最大延迟: {max_latency} 毫秒")
print(f" 最小延迟: {min_latency} 毫秒")
except redis.exceptions.ResponseError as e:
print(f"获取历史失败: {e}")
def monitor_latency_graph(self):
"""监控延迟图表"""
print("\n=== 延迟监控图表 ===")
def generate_ascii_chart(data, height=10):
"""生成ASCII图表"""
if not data:
return "无数据"
max_val = max(data)
if max_val == 0:
return "所有延迟为0"
chart = []
for i in range(height, 0, -1):
threshold = (max_val * i) / height
line = f"{threshold:6.1f} ms |"
for val in data:
if val >= threshold:
line += "█"
else:
line += " "
chart.append(line)
chart.append(" " + "-" * len(data))
chart.append("时间 " + "".join(str(i % 10) for i in range(len(data))))
return "\n".join(chart)
try:
# 获取最近的事件
event_types = self.client.execute_command('LATENCY', 'LATEST')
for event in event_types:
event_name = event[0]
print(f"\n事件类型: {event_name}")
# 获取历史数据
history = self.client.execute_command('LATENCY', 'HISTORY', event_name)
if history:
# 取最近20条记录
recent_latencies = [entry[1] for entry in history[-20:]]
chart = generate_ascii_chart(recent_latencies)
print(chart)
except Exception as e:
print(f"生成图表失败: {e}")
def doctor_latency(self):
"""延迟诊断"""
print("\n=== 延迟诊断 ===")
try:
# 执行延迟诊断
result = self.client.execute_command('LATENCY', 'DOCTOR')
print(result)
except redis.exceptions.ResponseError as e:
print(f"诊断失败: {e}")
def reset_latency_events(self):
"""重置延迟事件"""
print("\n=== 重置延迟事件 ===")
try:
self.client.execute_command('LATENCY', 'RESET')
print("延迟事件已重置")
except Exception as e:
print(f"重置失败: {e}")
def simulate_various_latencies(self):
"""模拟各种延迟场景"""
print("\n=== 模拟各种延迟场景 ===")
scenarios = [
("命令延迟", self._simulate_command_latency),
("fork延迟", self._simulate_fork_latency),
("过期延迟", self._simulate_expire_latency),
("AOF延迟", self._simulate_aof_latency)
]
for desc, scenario_func in scenarios:
print(f"\n模拟: {desc}")
scenario_func()
def _simulate_command_latency(self):
"""模拟命令延迟"""
# 执行各种命令
for i in range(10):
# 随机选择命令
cmd = random.choice(['set', 'get', 'hset', 'lpush'])
if cmd == 'set':
self.client.set(f"lat:cmd:{i}", "x" * random.randint(100, 10000))
elif cmd == 'get':
self.client.get(f"lat:cmd:{random.randint(0, 9)}")
elif cmd == 'hset':
self.client.hset(f"lat:hash:{i%5}", f"field{i}", f"value{i}")
else:
self.client.lpush(f"lat:list:{i%3}", f"item{i}")
time.sleep(0.1)
def _simulate_fork_latency(self):
"""模拟fork延迟"""
# 触发BGSAVE
self.client.execute_command('BGSAVE')
# 触发BGREWRITEAOF
self.client.execute_command('BGREWRITEAOF')
time.sleep(1)
def _simulate_expire_latency(self):
"""模拟过期延迟"""
# 创建大量带过期时间的key
for i in range(5000):
key = f"lat:exp:sim:{i}"
self.client.setex(key, random.randint(1, 3), f"value_{i}")
# 等待过期
time.sleep(4)
def _simulate_aof_latency(self):
"""模拟AOF延迟"""
# 修改AOF配置
old_fsync = self.client.config_get('appendfsync')['appendfsync']
# 设置为always模式(最慢)
self.client.config_set('appendfsync', 'always')
# 执行大量写操作
for i in range(1000):
self.client.set(f"lat:aof:{i}", f"value_{i}")
# 恢复配置
self.client.config_set('appendfsync', old_fsync)
def continuous_monitoring(self, duration=30):
"""持续监控"""
print(f"\n=== 持续监控 {duration} 秒 ===")
def monitor():
start_time = time.time()
while time.time() - start_time < duration:
try:
# 获取最新延迟事件
latest = self.client.execute_command('LATENCY', 'LATEST')
if latest:
print(f"\r当前延迟: {latest[0][2]} ms", end="")
else:
print(f"\r无延迟事件", end="")
time.sleep(2)
except Exception:
pass
# 在后台运行监控
monitor_thread = threading.Thread(target=monitor)
monitor_thread.daemon = True
monitor_thread.start()
# 同时生成延迟
self.simulate_various_latencies()
monitor_thread.join()
def cleanup(self):
"""清理测试数据"""
print("\n=== 清理测试数据 ===")
# 删除测试键
for key in self.client.scan_iter("latency:*"):
self.client.delete(key)
for key in self.client.scan_iter("lat:*"):
self.client.delete(key)
print("测试数据已清理")
def run_demo(self):
"""运行完整演示"""
try:
self.check_latency_config()
self.configure_latency_monitor(50) # 50ms阈值
self.generate_latency_events()
self.view_latency_events()
self.analyze_event_history('command')
self.monitor_latency_graph()
self.doctor_latency()
self.continuous_monitoring(20)
finally:
self.cleanup()
self.reset_latency_events()
if __name__ == "__main__":
demo = LatencyMonitoringDemo(password='yourpassword')
demo.run_demo()
3.6 SSL/TLS配置
知识点概述 :
Redis支持SSL/TLS加密通信,保证数据传输安全。
配置语法:
bash
# redis.conf
tls-port 6379 # TLS端口
tls-cert-file /path/to/redis.crt # 证书文件
tls-key-file /path/to/redis.key # 私钥文件
tls-ca-cert-file /path/to/ca.crt # CA证书
tls-auth-clients yes # 验证客户端证书
tls-protocols "TLSv1.2 TLSv1.3" # 支持的TLS协议
tls-ciphers "DEFAULT:!MEDIUM" # 加密套件
tls-prefer-server-ciphers yes # 优先使用服务器加密套件
tls-session-caching yes # 启用会话缓存
tls-session-cache-size 20480 # 会话缓存大小
案例代码:
python
import redis
import ssl
import os
import tempfile
from pathlib import Path
class SSLTLSDemo:
"""SSL/TLS配置演示"""
def __init__(self, host='localhost', port=6380, password=None):
self.host = host
self.port = port
self.password = password
self.cert_dir = tempfile.mkdtemp()
def generate_test_certificates(self):
"""生成测试证书(演示用)"""
print("=== 生成测试证书 ===")
# 在实际生产环境中,应该使用正式的CA签名的证书
# 这里仅用于演示目的
cert_file = os.path.join(self.cert_dir, 'redis.crt')
key_file = os.path.join(self.cert_dir, 'redis.key')
ca_file = os.path.join(self.cert_dir, 'ca.crt')
# 生成自签名证书的命令(需要openssl)
commands = f"""
# 生成CA私钥
openssl genrsa -out {ca_file.replace('.crt', '.key')} 2048
# 生成CA证书
openssl req -x509 -new -nodes -key {ca_file.replace('.crt', '.key')} -sha256 -days 365 -out {ca_file} -subj "/CN=Redis Test CA"
# 生成服务器私钥
openssl genrsa -out {key_file} 2048
# 生成证书签名请求
openssl req -new -key {key_file} -out {cert_file.replace('.crt', '.csr')} -subj "/CN=localhost"
# 使用CA签名服务器证书
openssl x509 -req -in {cert_file.replace('.crt', '.csr')} -CA {ca_file} -CAkey {ca_file.replace('.crt', '.key')} -CAcreateserial -out {cert_file} -days 365 -sha256
"""
print("生成证书...")
for cmd in commands.strip().split('\n'):
cmd = cmd.strip()
if cmd and not cmd.startswith('#'):
print(f"执行: {cmd}")
os.system(cmd)
return {
'cert_file': cert_file,
'key_file': key_file,
'ca_file': ca_file
}
def create_ssl_client(self, cert_files=None, verify_mode=ssl.CERT_REQUIRED):
"""创建SSL客户端连接"""
if cert_files:
# 使用SSL连接
ssl_context = ssl.create_default_context(
purpose=ssl.Purpose.SERVER_AUTH,
cafile=cert_files['ca_file']
)
ssl_context.check_hostname = False
ssl_context.verify_mode = verify_mode
if verify_mode != ssl.CERT_NONE:
ssl_context.load_cert_chain(
certfile=cert_files['cert_file'],
keyfile=cert_files['key_file']
)
client = redis.Redis(
host=self.host,
port=self.port,
password=self.password,
ssl=True,
ssl_context=ssl_context,
decode_responses=True
)
else:
# 普通连接
client = redis.Redis(
host=self.host,
port=6379,
password=self.password,
decode_responses=True
)
return client
def demonstrate_ssl_connection(self):
"""演示SSL连接"""
print("\n=== SSL连接演示 ===")
# 生成证书
cert_files = self.generate_test_certificates()
try:
# 1. 普通连接(非SSL)
print("\n1. 普通连接测试:")
plain_client = self.create_ssl_client(None)
plain_client.ping()
print(" 普通连接成功")
# 2. SSL连接(验证服务器证书)
print("\n2. SSL连接测试:")
ssl_client = self.create_ssl_client(cert_files, ssl.CERT_REQUIRED)
ssl_client.ping()
print(" SSL连接成功")
# 3. SSL连接但不验证
print("\n3. SSL连接(不验证证书):")
ssl_client_noverify = self.create_ssl_client(cert_files, ssl.CERT_NONE)
ssl_client_noverify.ping()
print(" SSL连接成功(无验证)")
# 4. 测试SSL下的基本操作
print("\n4. SSL下基本操作测试:")
test_key = "ssl:test:key"
test_value = "SSL Connection Test"
ssl_client.set(test_key, test_value)
retrieved = ssl_client.get(test_key)
print(f" 写入: {test_key} = {test_value}")
print(f" 读取: {retrieved}")
print(f" 匹配: {'✓' if retrieved == test_value else '✗'}")
# 5. 测试SSL管道
print("\n5. SSL管道测试:")
pipe = ssl_client.pipeline()
for i in range(10):
pipe.set(f"ssl:pipe:{i}", f"value{i}")
pipe.get(f"ssl:pipe:{i}")
results = pipe.execute()
print(f" 管道执行成功,返回{len(results)}个结果")
except redis.exceptions.ConnectionError as e:
print(f"连接失败: {e}")
print("请确保Redis服务器已配置SSL并正确启动")
finally:
# 清理
for client in [plain_client, ssl_client, ssl_client_noverify]:
try:
client.close()
except:
pass
def configure_redis_ssl(self):
"""配置Redis SSL(演示配置文件)"""
print("\n=== Redis SSL配置示例 ===")
ssl_config = """
# Redis SSL/TLS Configuration
port 6379
tls-port 6380
# TLS certificate configuration
tls-cert-file /etc/ssl/redis/redis.crt
tls-key-file /etc/ssl/redis/redis.key
tls-ca-cert-file /etc/ssl/redis/ca.crt
# TLS authentication
tls-auth-clients yes
tls-replication yes
tls-cluster yes
# TLS protocols and ciphers
tls-protocols "TLSv1.2 TLSv1.3"
tls-ciphers "DEFAULT:!MEDIUM"
tls-prefer-server-ciphers yes
# TLS session caching
tls-session-caching yes
tls-session-cache-size 20480
tls-session-cache-timeout 300
# TLS specific settings
tls-cert-file /etc/ssl/redis/redis.crt
tls-key-file /etc/ssl/redis/redis.key
"""
print(ssl_config)
print("\n启动Redis服务器:")
print("redis-server /path/to/redis.conf --tls-port 6380 \\")
print(" --tls-cert-file /etc/ssl/redis/redis.crt \\")
print(" --tls-key-file /etc/ssl/redis/redis.key \\")
print(" --tls-ca-cert-file /etc/ssl/redis/ca.crt")
def test_ssl_performance(self):
"""测试SSL性能影响"""
print("\n=== SSL性能测试 ===")
cert_files = self.generate_test_certificates()
# 普通连接
print("\n1. 普通连接性能测试:")
plain_client = self.create_ssl_client(None)
start = time.time()
for i in range(1000):
plain_client.set(f"perf:plain:{i}", "x" * 100)
plain_client.get(f"perf:plain:{i}")
plain_time = time.time() - start
print(f" 1000次操作耗时: {plain_time:.2f}秒")
print(f" 平均: {plain_time*1000:.2f}毫秒/操作")
# SSL连接
print("\n2. SSL连接性能测试:")
ssl_client = self.create_ssl_client(cert_files, ssl.CERT_REQUIRED)
start = time.time()
for i in range(1000):
ssl_client.set(f"perf:ssl:{i}", "x" * 100)
ssl_client.get(f"perf:ssl:{i}")
ssl_time = time.time() - start
print(f" 1000次操作耗时: {ssl_time:.2f}秒")
print(f" 平均: {ssl_time*1000:.2f}毫秒/操作")
# 性能对比
print(f"\n性能对比:")
print(f" SSL相对普通连接慢: {(ssl_time/plain_time - 1)*100:.1f}%")
plain_client.close()
ssl_client.close()
def demonstrate_ssl_security(self):
"""演示SSL安全特性"""
print("\n=== SSL安全特性 ===")
features = [
("1. 数据加密", "防止中间人窃听,所有传输数据加密"),
("2. 身份验证", "服务器身份验证,防止伪造服务器"),
("3. 数据完整性", "防止数据在传输过程中被篡改"),
("4. 客户端认证", "可选的双向认证,验证客户端身份"),
("5. 前向安全性", "使用DHE/ECDHE密钥交换,保证前向安全")
]
for feature, description in features:
print(f"{feature}:")
print(f" 描述: {description}")
# 演示数据捕获(理论上)
print("\n安全演示:")
print(" 未加密时,可以使用tcpdump捕获明文数据:")
print(" $ sudo tcpdump -i lo -A port 6379")
print(" 可以看到类似: 'SET user:password secret123'")
print("\n 使用SSL后,所有数据加密:")
print(" $ sudo tcpdump -i lo -A port 6380")
print(" 看到的是加密数据,无法直接读取")
def ssl_best_practices(self):
"""SSL最佳实践"""
print("\n=== SSL最佳实践 ===")
practices = [
{
"title": "证书管理",
"items": [
"使用可信CA签发的证书",
"定期轮换证书(建议每3-6个月)",
"保护好私钥文件权限(600)",
"使用证书吊销列表(CRL)或OCSP"
]
},
{
"title": "协议配置",
"items": [
"禁用不安全的协议版本(SSLv2, SSLv3, TLSv1.0)",
"使用强加密套件",
"启用前向安全性",
"配置合适的会话缓存"
]
},
{
"title": "性能优化",
"items": [
"使用会话重用减少握手开销",
"调整会话缓存大小",
"考虑使用硬件加速(如Intel AES-NI)",
"监控SSL连接性能"
]
},
{
"title": "监控和告警",
"items": [
"监控证书过期时间",
"监控SSL连接错误",
"记录SSL握手失败",
"设置证书过期告警"
]
}
]
for practice in practices:
print(f"\n{practice['title']}:")
for item in practice['items']:
print(f" • {item}")
def cleanup(self):
"""清理测试数据"""
import shutil
print("\n=== 清理测试数据 ===")
# 删除临时证书目录
if os.path.exists(self.cert_dir):
shutil.rmtree(self.cert_dir)
print(f"已删除临时证书目录: {self.cert_dir}")
# 关闭连接
try:
plain_client = redis.Redis(
host=self.host,
port=6379,
password=self.password,
decode_responses=True
)
# 清理测试key
for key in plain_client.scan_iter("ssl:*"):
plain_client.delete(key)
for key in plain_client.scan_iter("perf:*"):
plain_client.delete(key)
plain_client.close()
except:
pass
def run_demo(self):
"""运行完整演示"""
try:
self.configure_redis_ssl()
self.demonstrate_ssl_connection()
self.test_ssl_performance()
self.demonstrate_ssl_security()
self.ssl_best_practices()
except Exception as e:
print(f"演示失败: {e}")
finally:
self.cleanup()
if __name__ == "__main__":
demo = SSLTLSDemo(password='yourpassword')
demo.run_demo()
3.7 ACL访问控制列表
知识点概述 :
Redis 6.0引入的ACL功能,提供细粒度的权限控制。
配置语法:
bash
# redis.conf
aclfile /etc/redis/users.acl # ACL配置文件
# 或在命令行中创建用户
ACL SETUSER username on >password ~* +@all
案例代码:
python
import redis
import time
import hashlib
class ACLDemo:
"""ACL访问控制演示"""
def __init__(self, host='localhost', port=6379, admin_password=None):
# 管理员连接
self.admin = redis.Redis(
host=host,
port=port,
password=admin_password,
decode_responses=True
)
self.host = host
self.port = port
def check_acl_status(self):
"""检查ACL状态"""
print("=== ACL状态检查 ===")
# 查看ACL配置
aclfile = self.admin.config_get('aclfile')
print(f"ACL配置文件: {aclfile['aclfile']}")
# 查看当前用户
users = self.admin.acl_users()
print(f"当前用户: {users}")
# 查看当前用户权限
whoami = self.admin.acl_whoami()
print(f"当前用户身份: {whoami}")
# 查看ACL规则
rules = self.admin.acl_getuser(whoami)
print(f"当前用户权限:")
for key, value in rules.items():
print(f" {key}: {value}")
def create_users(self):
"""创建不同权限的用户"""
print("\n=== 创建用户 ===")
# 清理现有用户(保留默认用户)
try:
for user in self.admin.acl_users():
if user not in ['default', 'admin']:
self.admin.acl_deluser(user)
print(f"已删除用户: {user}")
except:
pass
# 1. 只读用户
print("\n1. 创建只读用户:")
readonly_pass = "readonly123"
self.admin.acl_setuser(
'readonly',
enabled=True,
passwords=[f'+{readonly_pass}'],
commands=['+get', '+ping'],
keys=['cache:*']
)
print(f" 用户: readonly")
print(f" 密码: {readonly_pass}")
print(f" 权限: 只能执行GET/PING命令,只能访问cache:*键")
# 2. 读写用户
print("\n2. 创建读写用户:")
readwrite_pass = "readwrite123"
self.admin.acl_setuser(
'readwrite',
enabled=True,
passwords=[f'+{readwrite_pass}'],
commands=['+@read', '+@write', '+@set', '+@hash', '+@list'],
keys=['app:*', 'user:*']
)
print(f" 用户: readwrite")
print(f" 密码: {readwrite_pass}")
print(f" 权限: 读写权限,只能访问app:*和user:*键")
# 3. 管理员用户
print("\n3. 创建管理员用户:")
admin_pass = "admin123"
self.admin.acl_setuser(
'admin',
enabled=True,
passwords=[f'+{admin_pass}'],
commands=['+@all'],
keys=['*']
)
print(f" 用户: admin")
print(f" 密码: {admin_pass}")
print(f" 权限: 所有命令,所有键")
# 4. 受限用户
print("\n4. 创建受限用户:")
restricted_pass = "restricted123"
self.admin.acl_setuser(
'restricted',
enabled=True,
passwords=[f'+{restricted_pass}'],
commands=['+get', '+set', '+incr', '+decr', '-del', '-flushdb', '-flushall'],
keys=['counter:*']
)
print(f" 用户: restricted")
print(f" 密码: {restricted_pass}")
print(f" 权限: 只能操作计数器,不能删除")
# 验证创建
users = self.admin.acl_users()
print(f"\n当前用户列表: {users}")
def test_user_permissions(self):
"""测试用户权限"""
print("\n=== 测试用户权限 ===")
def create_user_connection(username, password):
"""创建用户连接"""
return redis.Redis(
host=self.host,
port=self.port,
username=username,
password=password,
decode_responses=True
)
# 测试数据准备
self.admin.set('app:config', 'app_value')
self.admin.set('cache:data', 'cache_value')
self.admin.set('counter:visits', '100')
self.admin.set('user:admin', 'admin_data')
# 1. 测试只读用户
print("\n1. 只读用户测试:")
readonly_client = create_user_connection('readonly', 'readonly123')
try:
# 允许的操作
value = readonly_client.get('cache:data')
print(f" ✓ GET cache:data: {value}")
# 禁止的操作
try:
readonly_client.set('cache:new', 'value')
print(f" ✗ 应该禁止SET操作")
except redis.exceptions.ResponseError as e:
print(f" ✓ 正确拒绝: {e}")
try:
readonly_client.get('app:config') # 不在允许的key模式
print(f" ✗ 应该禁止访问app:*")
except redis.exceptions.ResponseError as e:
print(f" ✓ 正确拒绝: {e}")
finally:
readonly_client.close()
# 2. 测试读写用户
print("\n2. 读写用户测试:")
readwrite_client = create_user_connection('readwrite', 'readwrite123')
try:
# 允许的操作
readwrite_client.set('app:new', 'new_value')
print(f" ✓ SET app:new 成功")
value = readwrite_client.get('app:new')
print(f" ✓ GET app:new: {value}")
# 禁止的操作
try:
readwrite_client.set('cache:data', 'modified') # 不在允许的key模式
print(f" ✗ 应该禁止访问cache:*")
except redis.exceptions.ResponseError as e:
print(f" ✓ 正确拒绝: {e}")
finally:
readwrite_client.close()
# 3. 测试受限用户
print("\n3. 受限用户测试:")
restricted_client = create_user_connection('restricted', 'restricted123')
try:
# 允许的操作
restricted_client.incr('counter:visits')
print(f" ✓ INCR counter:visits 成功")
value = restricted_client.get('counter:visits')
print(f" ✓ GET counter:visits: {value}")
# 禁止的操作
try:
restricted_client.set('counter:visits', '0') # SET允许但可能有其他限制
print(f" ✓ SET counter:visits 允许(在允许命令列表中)")
except redis.exceptions.ResponseError as e:
print(f" ✗ SET应该允许: {e}")
try:
restricted_client.delete('counter:visits') # DEL被禁止
print(f" ✗ 应该禁止DELETE操作")
except redis.exceptions.ResponseError as e:
print(f" ✓ 正确拒绝: {e}")
finally:
restricted_client.close()
# 4. 测试管理员用户
print("\n4. 管理员用户测试:")
admin_client = create_user_connection('admin', 'admin123')
try:
# 所有操作都应该允许
admin_client.set('any:key', 'any_value')
print(f" ✓ SET any:key 成功")
admin_client.get('any:key')
print(f" ✓ GET any:key 成功")
admin_client.delete('any:key')
print(f" ✓ DELETE any:key 成功")
# ACL管理命令
users = admin_client.acl_users()
print(f" ✓ ACL USERS: {users}")
finally:
admin_client.close()
def manage_acls(self):
"""管理ACL"""
print("\n=== ACL管理 ===")
# 1. 查看用户详情
print("\n1. 查看用户详情:")
for username in ['readonly', 'readwrite', 'restricted', 'admin']:
try:
user_info = self.admin.acl_getuser(username)
print(f"\n用户: {username}")
for key, value in user_info.items():
print(f" {key}: {value}")
except:
pass
# 2. 修改用户权限
print("\n2. 修改用户权限:")
print(" 为只读用户添加对app:*的读权限")
self.admin.acl_setuser(
'readonly',
keys=['cache:*', 'app:*'] # 添加app:*的权限
)
# 验证修改
user_info = self.admin.acl_getuser('readonly')
print(f" 更新后的key模式: {user_info['keys']}")
# 3. 临时禁用用户
print("\n3. 临时禁用用户:")
self.admin.acl_setuser('readonly', enabled=False)
print(" 用户readonly已禁用")
# 尝试连接(应该失败)
try:
readonly_client = redis.Redis(
host=self.host,
port=self.port,
username='readonly',
password='readonly123',
decode_responses=True
)
readonly_client.ping()
print(" ✗ 用户应该被禁用")
except redis.exceptions.ConnectionError as e:
print(f" ✓ 连接失败: {e}")
# 重新启用
self.admin.acl_setuser('readonly', enabled=True)
print(" 用户readonly已重新启用")
# 4. 删除用户
print("\n4. 删除用户:")
self.admin.acl_deluser('restricted')
print(" 用户restricted已删除")
# 验证删除
users = self.admin.acl_users()
print(f" 当前用户: {users}")
def acl_log(self):
"""查看ACL日志"""
print("\n=== ACL日志 ===")
# 生成一些ACL错误
print("生成ACL错误日志...")
# 使用错误密码连接
try:
bad_client = redis.Redis(
host=self.host,
port=self.port,
username='readonly',
password='wrong_password',
decode_responses=True
)
bad_client.ping()
except:
pass
# 使用无权命令
try:
readonly_client = redis.Redis(
host=self.host,
port=self.port,
username='readonly',
password='readonly123',
decode_responses=True
)
readonly_client.set('cache:test', 'value') # SET对readonly用户是禁止的
except:
pass
# 查看ACL日志
acl_log = self.admin.acl_log()
print(f"ACL日志条目数: {len(acl_log)}")
for i, entry in enumerate(acl_log, 1):
print(f"\n日志 {i}:")
print(f" 时间: {entry['datetime']}")
print(f" 用户名: {entry['username']}")
print(f" 上下文: {entry['context']}")
print(f" 原因: {entry['reason']}")
print(f" 命令: {' '.join(entry['command'])}")
print(f" 客户端: {entry['client-info']}")
# 清空ACL日志
self.admin.acl_log_reset()
print("\nACL日志已清空")
def save_load_acl(self):
"""保存和加载ACL"""
print("\n=== 保存/加载ACL ===")
# 1. 保存当前ACL到文件
try:
self.admin.acl_save()
print("ACL配置已保存到文件")
except redis.exceptions.ResponseError as e:
print(f"保存失败: {e}")
print("需要在redis.conf中配置aclfile")
# 2. 从文件加载ACL
try:
self.admin.acl_load()
print("ACL配置已从文件加载")
except redis.exceptions.ResponseError as e:
print(f"加载失败: {e}")
# 3. 查看ACL文件内容(演示)
aclfile = self.admin.config_get('aclfile')['aclfile']
if aclfile and os.path.exists(aclfile):
print(f"\nACL文件内容 ({aclfile}):")
with open(aclfile, 'r') as f:
for line in f:
print(f" {line.strip()}")
def acl_categories(self):
"""ACL命令类别"""
print("\n=== ACL命令类别 ===")
categories = [
("@admin", "管理命令(ACL, CONFIG, DEBUG等)"),
("@dangerous", "危险命令(FLUSHALL, SHUTDOWN等)"),
("@keyspace", "键空间命令(DEL, EXISTS, EXPIRE等)"),
("@read", "读命令(GET, HGET, LRANGE等)"),
("@write", "写命令(SET, HSET, LPUSH等)"),
("@set", "集合操作(SADD, SREM, SISMEMBER等)"),
("@hash", "哈希操作(HSET, HGET, HDEL等)"),
("@list", "列表操作(LPUSH, LPOP, LRANGE等)"),
("@string", "字符串操作(SET, GET, INCR等)"),
("@pubsub", "发布订阅命令"),
("@transaction", "事务命令(MULTI, EXEC等)"),
("@connection", "连接命令(AUTH, PING等)"),
("@slow", "慢命令(KEYS, SORT等)")
]
print("命令类别说明:")
for category, description in categories:
print(f" {category:15} - {description}")
def acl_best_practices(self):
"""ACL最佳实践"""
print("\n=== ACL最佳实践 ===")
practices = [
{
"title": "用户管理",
"items": [
"遵循最小权限原则",
"为不同应用创建不同用户",
"定期审查用户权限",
"禁用默认用户或修改其密码"
]
},
{
"title": "密码策略",
"items": [
"使用强密码",
"定期更换密码",
"不要在代码中硬编码密码",
"使用环境变量或密钥管理服务"
]
},
{
"title": "命令限制",
"items": [
"限制危险命令(FLUSHALL, SHUTDOWN等)",
"限制慢命令(KEYS, SORT等)",
"使用命令类别简化权限管理",
"监控ACL日志发现异常"
]
},
{
"title": "密钥模式",
"items": [
"使用有意义的key前缀",
"限制应用只能访问特定模式",
"避免使用过于宽泛的模式",
"考虑使用多级模式"
]
}
]
for practice in practices:
print(f"\n{practice['title']}:")
for item in practice['items']:
print(f" • {item}")
# 示例配置
print("\n示例ACL规则:")
print('''
# 应用用户
user app1 on +@read +@write -@dangerous ~app1:* >strong_password
# 只读监控用户
user monitor on +@read -@dangerous +ping +info ~* >monitor_pass
# 管理员用户
user admin on +@all ~* >admin_pass
# 定时任务用户
user cron on +get +set +incr +decr ~counter:* ~temp:* >cron_pass
''')
def cleanup(self):
"""清理测试数据"""
print("\n=== 清理测试数据 ===")
# 删除测试用户
for user in ['readonly', 'readwrite', 'restricted', 'admin']:
try:
self.admin.acl_deluser(user)
print(f"已删除用户: {user}")
except:
pass
# 清理测试键
for key in self.admin.scan_iter("*"):
if key not in ['default']:
self.admin.delete(key)
print("测试数据已清理")
def run_demo(self):
"""运行完整演示"""
try:
self.check_acl_status()
self.acl_categories()
self.create_users()
self.test_user_permissions()
self.manage_acls()
self.acl_log()
self.save_load_acl()
self.acl_best_practices()
finally:
self.cleanup()
if __name__ == "__main__":
import os
demo = ACLDemo(admin_password='yourpassword')
demo.run_demo()
四、监控与管理
4.1 RedisInsight可视化监控
知识点概述 :
RedisInsight是Redis官方提供的图形化管理工具。
案例代码:
python
# 注意:RedisInsight是一个GUI工具,这里提供API调用示例
import redis
import json
import requests
from datetime import datetime
class RedisInsightDemo:
"""RedisInsight监控演示"""
def __init__(self, host='localhost', port=6379, password=None):
self.client = redis.Redis(
host=host,
port=port,
password=password,
decode_responses=True
)
def collect_metrics(self):
"""收集监控指标"""
print("=== 收集监控指标 ===")
# 1. 服务器信息
server_info = self.client.info('server')
print("\n1. 服务器信息:")
print(f" Redis版本: {server_info['redis_version']}")
print(f" OS: {server_info['os']}")
print(f" 进程ID: {server_info['process_id']}")
print(f" TCP端口: {server_info['tcp_port']}")
print(f" 运行时间: {server_info['uptime_in_seconds']}秒")
# 2. 内存信息
memory_info = self.client.info('memory')
print("\n2. 内存信息:")
print(f" 已用内存: {memory_info['used_memory_human']}")
print(f" 峰值内存: {memory_info['used_memory_peak_human']}")
print(f" 内存碎片率: {memory_info['mem_fragmentation_ratio']}")
print(f" 最大内存: {memory_info.get('maxmemory_human', '无限制')}")
# 3. 客户端信息
clients_info = self.client.info('clients')
print("\n3. 客户端信息:")
print(f" 已连接客户端: {clients_info['connected_clients']}")
print(f" 最大连接数: {clients_info.get('maxclients', 'N/A')}")
print(f" 阻塞客户端: {clients_info.get('blocked_clients', 0)}")
# 4. 统计信息
stats_info = self.client.info('stats')
print("\n4. 统计信息:")
print(f" 总连接数: {stats_info['total_connections_received']}")
print(f" 总命令数: {stats_info['total_commands_processed']}")
print(f" 瞬时ops: {stats_info['instantaneous_ops_per_sec']}")
print(f" 网络输入: {stats_info.get('total_net_input_bytes_human', 'N/A')}")
print(f" 网络输出: {stats_info.get('total_net_output_bytes_human', 'N/A')}")
# 5. 键空间信息
keyspace_info = self.client.info('keyspace')
print("\n5. 键空间信息:")
total_keys = 0
for db, stats in keyspace_info.items():
if db.startswith('db'):
print(f" {db}: {stats}")
# 解析键数量
parts = stats.split(',')
for part in parts:
if 'keys=' in part:
total_keys += int(part.split('=')[1])
print(f" 总键数: {total_keys}")
return {
'timestamp': datetime.now().isoformat(),
'server': server_info,
'memory': memory_info,
'clients': clients_info,
'stats': stats_info,
'keyspace': keyspace_info
}
def generate_performance_report(self):
"""生成性能报告"""
print("\n=== 生成性能报告 ===")
# 1. 命令统计
cmd_stats = self.client.info('commandstats')
print("命令执行统计:")
cmd_list = []
for cmd, stats in cmd_stats.items():
if cmd.startswith('cmdstat_'):
cmd_name = cmd[8:]
calls = int(stats['calls'])
usec = int(stats['usec'])
usec_per_call = float(stats['usec_per_call'])
cmd_list.append({
'command': cmd_name,
'calls': calls,
'total_time': usec,
'avg_time': usec_per_call
})
print(f" {cmd_name}:")
print(f" 调用次数: {calls}")
print(f" 总耗时: {usec/1000000:.2f}秒")
print(f" 平均耗时: {usec_per_call:.2f}微秒")
# 2. CPU使用情况
cpu_info = self.client.info('cpu')
print("\nCPU使用情况:")
print(f" 系统CPU: {cpu_info['used_cpu_sys']:.2f}秒")
print(f" 用户CPU: {cpu_info['used_cpu_user']:.2f}秒")
print(f" 子进程系统CPU: {cpu_info.get('used_cpu_sys_children', 0):.2f}秒")
print(f" 子进程用户CPU: {cpu_info.get('used_cpu_user_children', 0):.2f}秒")
# 3. 持久化信息
persistence_info = self.client.info('persistence')
print("\n持久化信息:")
if persistence_info.get('rdb_last_save_time'):
last_save = datetime.fromtimestamp(persistence_info['rdb_last_save_time'])
print(f" 最后RDB保存: {last_save}")
print(f" RDB文件大小: {persistence_info.get('rdb_current_size', 0) / 1024 / 1024:.2f}MB")
if persistence_info.get('aof_enabled'):
print(f" AOF开启: 是")
print(f" AOF文件大小: {persistence_info.get('aof_current_size', 0) / 1024 / 1024:.2f}MB")
# 4. 复制信息(如果是集群)
replication_info = self.client.info('replication')
if replication_info.get('role'):
print(f"\n复制角色: {replication_info['role']}")
if replication_info['role'] == 'master':
print(f" 从节点数: {replication_info.get('connected_slaves', 0)}")
else:
print(f" 主节点: {replication_info.get('master_host', 'N/A')}:{replication_info.get('master_port', 'N/A')}")
print(f" 复制偏移量: {replication_info.get('master_repl_offset', 0)}")
return cmd_list
def monitor_slow_queries(self):
"""监控慢查询"""
print("\n=== 监控慢查询 ===")
# 获取慢日志配置
slowlog_config = self.client.config_get('slowlog-log-slower-than')
slowlog_max_len = self.client.config_get('slowlog-max-len')
print(f"慢查询阈值: {slowlog_config['slowlog-log-slower-than']} 微秒")
print(f"慢日志最大长度: {slowlog_max_len['slowlog-max-len']}")
# 获取慢日志
slowlogs = self.client.slowlog_get(10)
if not slowlogs:
print("没有慢查询记录")
else:
print(f"\n最近{len(slowlogs)}条慢查询:")
for i, log in enumerate(slowlogs, 1):
duration = log['duration']
command = ' '.join(log['command'])
timestamp = log['start_time']
time_str = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
print(f"\n{i}. 时间: {time_str}")
print(f" 耗时: {duration} 微秒 ({duration/1000:.2f} 毫秒)")
print(f" 命令: {command[:100]}{'...' if len(command) > 100 else ''}")
return slowlogs
def analyze_memory_usage(self):
"""分析内存使用"""
print("\n=== 内存使用分析 ===")
# 获取内存信息
memory_info = self.client.info('memory')
# 计算内存使用率
used_memory = memory_info['used_memory']
maxmemory = memory_info.get('maxmemory', 0)
if maxmemory > 0:
usage_percent = (used_memory / maxmemory) * 100
print(f"内存使用率: {usage_percent:.1f}% ({used_memory/1024/1024:.2f}MB / {maxmemory/1024/1024:.2f}MB)")
else:
print(f"内存使用: {used_memory/1024/1024:.2f}MB (无限制)")
# 查找大键
print("\n查找大键 (扫描中...):")
large_keys = []
cursor = '0'
while cursor != 0:
cursor, keys = self.client.scan(cursor, count=1000)
for key in keys:
try:
key_type = self.client.type(key)
key_size = self.client.memory_usage(key)
if key_size and key_size > 1024 * 1024: # 大于1MB
large_keys.append({
'key': key,
'type': key_type,
'size': key_size,
'size_mb': key_size / 1024 / 1024
})
except:
pass
if large_keys:
print(f"发现 {len(large_keys)} 个大键 (>1MB):")
for key_info in sorted(large_keys, key=lambda x: x['size'], reverse=True)[:10]:
print(f" {key_info['key']}: {key_info['type']} - {key_info['size_mb']:.2f}MB")
else:
print(" 没有发现大键")
return large_keys
def monitor_connections(self):
"""监控连接"""
print("\n=== 连接监控 ===")
# 获取客户端列表
clients = self.client.client_list()
print(f"当前连接数: {len(clients)}")
# 分析连接
addr_count = {}
for client in clients:
addr = client.get('addr', 'unknown')
# 提取IP
ip = addr.split(':')[0] if ':' in addr else addr
addr_count[ip] = addr_count.get(ip, 0) + 1
print("\n按IP统计连接:")
for ip, count in addr_count.items():
print(f" {ip}: {count}个连接")
# 空闲连接
idle_clients = []
for client in clients:
idle = int(client.get('idle', 0))
if idle > 300: # 空闲超过5分钟
idle_clients.append({
'addr': client.get('addr', 'unknown'),
'idle': idle,
'cmd': client.get('cmd', 'unknown')
})
if idle_clients:
print(f"\n空闲连接 (>5分钟):")
for client in idle_clients:
print(f" {client['addr']} - 空闲{client['idle']}秒, 命令:{client['cmd']}")
return clients
def generate_dashboard_data(self):
"""生成仪表盘数据"""
print("\n=== 生成仪表盘数据 ===")
dashboard = {
'timestamp': datetime.now().isoformat(),
'metrics': {
'connected_clients': 0,
'used_memory': 0,
'total_commands': 0,
'ops_per_sec': 0,
'hit_rate': 0,
'evicted_keys': 0,
'expired_keys': 0,
'rejected_connections': 0
},
'alerts': []
}
# 收集指标
info = self.client.info()
dashboard['metrics']['connected_clients'] = info.get('connected_clients', 0)
dashboard['metrics']['used_memory'] = info.get('used_memory', 0)
dashboard['metrics']['total_commands'] = info.get('total_commands_processed', 0)
dashboard['metrics']['ops_per_sec'] = info.get('instantaneous_ops_per_sec', 0)
dashboard['metrics']['evicted_keys'] = info.get('evicted_keys', 0)
dashboard['metrics']['expired_keys'] = info.get('expired_keys', 0)
dashboard['metrics']['rejected_connections'] = info.get('rejected_connections', 0)
# 计算命中率
keyspace_hits = info.get('keyspace_hits', 0)
keyspace_misses = info.get('keyspace_misses', 0)
total_requests = keyspace_hits + keyspace_misses
if total_requests > 0:
hit_rate = (keyspace_hits / total_requests) * 100
dashboard['metrics']['hit_rate'] = hit_rate
else:
dashboard['metrics']['hit_rate'] = 0
# 检查告警条件
if dashboard['metrics']['connected_clients'] > 9000:
dashboard['alerts'].append({
'level': 'warning',
'message': f"连接数过高: {dashboard['metrics']['connected_clients']}"
})
if dashboard['metrics']['hit_rate'] < 80:
dashboard['alerts'].append({
'level': 'warning',
'message': f"命中率过低: {dashboard['metrics']['hit_rate']:.1f}%"
})
if dashboard['metrics']['evicted_keys'] > 1000:
dashboard['alerts'].append({
'level': 'critical',
'message': f"大量淘汰键: {dashboard['metrics']['evicted_keys']}"
})
# 输出
print("\n当前指标:")
for key, value in dashboard['metrics'].items():
if key == 'used_memory':
print(f" {key}: {value/1024/1024:.2f}MB")
elif key == 'hit_rate':
print(f" {key}: {value:.1f}%")
else:
print(f" {key}: {value}")
if dashboard['alerts']:
print("\n告警:")
for alert in dashboard['alerts']:
print(f" [{alert['level']}] {alert['message']}")
return dashboard
def export_to_prometheus(self):
"""导出到Prometheus格式"""
print("\n=== 导出Prometheus格式 ===")
info = self.client.info()
metrics = []
# 转换为Prometheus格式
prometheus_metrics = [
('redis_connected_clients', info.get('connected_clients', 0), '当前连接数'),
('redis_used_memory_bytes', info.get('used_memory', 0), '内存使用(bytes)'),
('redis_total_commands_processed', info.get('total_commands_processed', 0), '总命令数'),
('redis_instantaneous_ops_per_sec', info.get('instantaneous_ops_per_sec', 0), '每秒操作数'),
('redis_keyspace_hits_total', info.get('keyspace_hits', 0), '命中总数'),
('redis_keyspace_misses_total', info.get('keyspace_misses', 0), '未命中总数'),
('redis_evicted_keys_total', info.get('evicted_keys', 0), '淘汰键总数'),
('redis_expired_keys_total', info.get('expired_keys', 0), '过期键总数'),
('redis_rejected_connections_total', info.get('rejected_connections', 0), '拒绝连接数'),
]
print("# HELP redis_xxx Redis metrics")
print("# TYPE redis_xxx gauge/counter")
for name, value, help_text in prometheus_metrics:
print(f"# HELP {name} {help_text}")
if 'total' in name:
print(f"# TYPE {name} counter")
else:
print(f"# TYPE {name} gauge")
print(f"{name} {value}")
print()
return prometheus_metrics
def run_demo(self):
"""运行完整演示"""
print("=" * 50)
print("RedisInsight监控演示")
print("=" * 50)
# 收集各种监控数据
metrics = self.collect_metrics()
cmd_stats = self.generate_performance_report()
slow_logs = self.monitor_slow_queries()
large_keys = self.analyze_memory_usage()
clients = self.monitor_connections()
dashboard = self.generate_dashboard_data()
prometheus = self.export_to_prometheus()
print("\n" + "=" * 50)
print("监控完成")
print("=" * 50)
if __name__ == "__main__":
demo = RedisInsightDemo(password='yourpassword')
demo.run_demo()
4.2 Redis Exporter + Prometheus
知识点概述 :
使用Redis Exporter将Redis指标导出到Prometheus进行监控。
案例代码:
python
import redis
import time
import random
import threading
from prometheus_client import start_http_server, Gauge, Counter, Histogram
from prometheus_client.core import CollectorRegistry
import argparse
class RedisExporter:
"""Redis Exporter实现"""
def __init__(self, redis_host='localhost', redis_port=6379, redis_password=None, exporter_port=9121):
self.redis_host = redis_host
self.redis_port = redis_port
self.redis_password = redis_password
self.exporter_port = exporter_port
# 创建Redis连接
self.redis_client = redis.Redis(
host=redis_host,
port=redis_port,
password=redis_password,
decode_responses=True,
socket_keepalive=True,
socket_connect_timeout=5
)
# 创建Registry
self.registry = CollectorRegistry()
# 定义指标
self.metrics = {}
self._setup_metrics()
def _setup_metrics(self):
"""设置Prometheus指标"""
# 服务器信息
self.metrics['server_info'] = Gauge(
'redis_server_info',
'Redis server information',
['version', 'mode', 'role'],
registry=self.registry
)
# 连接指标
self.metrics['connected_clients'] = Gauge(
'redis_connected_clients',
'Number of connected clients',
registry=self.registry
)
self.metrics['blocked_clients'] = Gauge(
'redis_blocked_clients',
'Number of blocked clients',
registry=self.registry
)
self.metrics['maxclients'] = Gauge(
'redis_maxclients',
'Maximum number of clients',
registry=self.registry
)
# 内存指标
self.metrics['used_memory'] = Gauge(
'redis_used_memory_bytes',
'Used memory in bytes',
registry=self.registry
)
self.metrics['used_memory_rss'] = Gauge(
'redis_used_memory_rss_bytes',
'Used memory RSS in bytes',
registry=self.registry
)
self.metrics['used_memory_peak'] = Gauge(
'redis_used_memory_peak_bytes',
'Peak memory usage in bytes',
registry=self.registry
)
self.metrics['used_memory_lua'] = Gauge(
'redis_used_memory_lua_bytes',
'Lua memory usage in bytes',
registry=self.registry
)
self.metrics['mem_fragmentation_ratio'] = Gauge(
'redis_mem_fragmentation_ratio',
'Memory fragmentation ratio',
registry=self.registry
)
self.metrics['maxmemory'] = Gauge(
'redis_maxmemory_bytes',
'Maximum memory limit in bytes',
registry=self.registry
)
# 统计指标
self.metrics['total_commands_processed'] = Counter(
'redis_commands_processed_total',
'Total number of commands processed',
registry=self.registry
)
self.metrics['total_connections_received'] = Counter(
'redis_connections_received_total',
'Total number of connections received',
registry=self.registry
)
self.metrics['instantaneous_ops_per_sec'] = Gauge(
'redis_instantaneous_ops_per_sec',
'Instantaneous operations per second',
registry=self.registry
)
self.metrics['instantaneous_input_kbps'] = Gauge(
'redis_instantaneous_input_kbps',
'Instantaneous input KB/s',
registry=self.registry
)
self.metrics['instantaneous_output_kbps'] = Gauge(
'redis_instantaneous_output_kbps',
'Instantaneous output KB/s',
registry=self.registry
)
# 键空间指标
self.metrics['keyspace_hits'] = Counter(
'redis_keyspace_hits_total',
'Total number of keyspace hits',
registry=self.registry
)
self.metrics['keyspace_misses'] = Counter(
'redis_keyspace_misses_total',
'Total number of keyspace misses',
registry=self.registry
)
self.metrics['expired_keys'] = Counter(
'redis_expired_keys_total',
'Total number of expired keys',
registry=self.registry
)
self.metrics['evicted_keys'] = Counter(
'redis_evicted_keys_total',
'Total number of evicted keys',
registry=self.registry
)
self.metrics['db_keys'] = Gauge(
'redis_db_keys',
'Number of keys in database',
['db'],
registry=self.registry
)
self.metrics['db_keys_expiring'] = Gauge(
'redis_db_keys_expiring',
'Number of keys with expiration in database',
['db'],
registry=self.registry
)
# 复制指标
self.metrics['connected_slaves'] = Gauge(
'redis_connected_slaves',
'Number of connected slaves',
registry=self.registry
)
self.metrics['master_repl_offset'] = Gauge(
'redis_master_repl_offset',
'Master replication offset',
registry=self.registry
)
self.metrics['repl_backlog_active'] = Gauge(
'redis_repl_backlog_active',
'Replication backlog active',
registry=self.registry
)
self.metrics['repl_backlog_size'] = Gauge(
'redis_repl_backlog_bytes',
'Replication backlog size in bytes',
registry=self.registry
)
# CPU指标
self.metrics['used_cpu_sys'] = Counter(
'redis_cpu_sys_seconds_total',
'System CPU time used',
registry=self.registry
)
self.metrics['used_cpu_user'] = Counter(
'redis_cpu_user_seconds_total',
'User CPU time used',
registry=self.registry
)
self.metrics['used_cpu_sys_children'] = Counter(
'redis_cpu_sys_children_seconds_total',
'System CPU time used by child processes',
registry=self.registry
)
self.metrics['used_cpu_user_children'] = Counter(
'redis_cpu_user_children_seconds_total',
'User CPU time used by child processes',
registry=self.registry
)
# 持久化指标
self.metrics['rdb_last_save_time'] = Gauge(
'redis_rdb_last_save_timestamp_seconds',
'Last RDB save timestamp',
registry=self.registry
)
self.metrics['rdb_changes_since_last_save'] = Gauge(
'redis_rdb_changes_since_last_save',
'Changes since last RDB save',
registry=self.registry
)
self.metrics['aof_enabled'] = Gauge(
'redis_aof_enabled',
'AOF enabled',
registry=self.registry
)
self.metrics['aof_rewrite_in_progress'] = Gauge(
'redis_aof_rewrite_in_progress',
'AOF rewrite in progress',
registry=self.registry
)
# 命令统计
self.metrics['command_calls'] = Counter(
'redis_command_calls_total',
'Number of command calls',
['command'],
registry=self.registry
)
self.metrics['command_usec_total'] = Counter(
'redis_command_usec_total',
'Total microseconds spent on command',
['command'],
registry=self.registry
)
def collect_metrics(self):
"""收集Redis指标"""
try:
# 获取INFO信息
info = self.redis_client.info()
# 服务器信息
version = info.get('redis_version', 'unknown')
mode = info.get('redis_mode', 'unknown')
role = info.get('role', 'unknown')
self.metrics['server_info'].labels(version=version, mode=mode, role=role).set(1)
# 客户端指标
self.metrics['connected_clients'].set(info.get('connected_clients', 0))
self.metrics['blocked_clients'].set(info.get('blocked_clients', 0))
self.metrics['maxclients'].set(info.get('maxclients', 0))
# 内存指标
self.metrics['used_memory'].set(info.get('used_memory', 0))
self.metrics['used_memory_rss'].set(info.get('used_memory_rss', 0))
self.metrics['used_memory_peak'].set(info.get('used_memory_peak', 0))
self.metrics['used_memory_lua'].set(info.get('used_memory_lua', 0))
self.metrics['mem_fragmentation_ratio'].set(info.get('mem_fragmentation_ratio', 0))
self.metrics['maxmemory'].set(info.get('maxmemory', 0))
# 统计指标
self.metrics['total_commands_processed'].inc(info.get('total_commands_processed', 0) - self.metrics['total_commands_processed']._value.get())
self.metrics['total_connections_received'].inc(info.get('total_connections_received', 0) - self.metrics['total_connections_received']._value.get())
self.metrics['instantaneous_ops_per_sec'].set(info.get('instantaneous_ops_per_sec', 0))
self.metrics['instantaneous_input_kbps'].set(info.get('instantaneous_input_kbps', 0))
self.metrics['instantaneous_output_kbps'].set(info.get('instantaneous_output_kbps', 0))
# 键空间指标
self.metrics['keyspace_hits'].inc(info.get('keyspace_hits', 0) - self.metrics['keyspace_hits']._value.get())
self.metrics['keyspace_misses'].inc(info.get('keyspace_misses', 0) - self.metrics['keyspace_misses']._value.get())
self.metrics['expired_keys'].inc(info.get('expired_keys', 0) - self.metrics['expired_keys']._value.get())
self.metrics['evicted_keys'].inc(info.get('evicted_keys', 0) - self.metrics['evicted_keys']._value.get())
# DB指标
for key, value in info.items():
if key.startswith('db'):
db_name = key
# 解析字符串如: keys=100,expires=10,avg_ttl=1000
parts = value.split(',')
for part in parts:
if 'keys=' in part:
keys = int(part.split('=')[1])
self.metrics['db_keys'].labels(db=db_name).set(keys)
elif 'expires=' in part:
expires = int(part.split('=')[1])
self.metrics['db_keys_expiring'].labels(db=db_name).set(expires)
# 复制指标
self.metrics['connected_slaves'].set(info.get('connected_slaves', 0))
self.metrics['master_repl_offset'].set(info.get('master_repl_offset', 0))
self.metrics['repl_backlog_active'].set(info.get('repl_backlog_active', 0))
self.metrics['repl_backlog_size'].set(info.get('repl_backlog_size', 0))
# CPU指标
self.metrics['used_cpu_sys'].inc(info.get('used_cpu_sys', 0) - self.metrics['used_cpu_sys']._value.get())
self.metrics['used_cpu_user'].inc(info.get('used_cpu_user', 0) - self.metrics['used_cpu_user']._value.get())
self.metrics['used_cpu_sys_children'].inc(info.get('used_cpu_sys_children', 0) - self.metrics['used_cpu_sys_children']._value.get())
self.metrics['used_cpu_user_children'].inc(info.get('used_cpu_user_children', 0) - self.metrics['used_cpu_user_children']._value.get())
# 持久化指标
self.metrics['rdb_last_save_time'].set(info.get('rdb_last_save_time', 0))
self.metrics['rdb_changes_since_last_save'].set(info.get('rdb_changes_since_last_save', 0))
self.metrics['aof_enabled'].set(1 if info.get('aof_enabled') else 0)
self.metrics['aof_rewrite_in_progress'].set(1 if info.get('aof_rewrite_in_progress') else 0)
# 命令统计
cmd_stats = self.redis_client.info('commandstats')
for cmd, stats in cmd_stats.items():
if cmd.startswith('cmdstat_'):
cmd_name = cmd[8:]
calls = int(stats['calls'])
usec = int(stats['usec'])
# 获取当前值
current_calls = self.metrics['command_calls']._metrics.get((cmd_name,), {}).get('value', 0)
current_usec = self.metrics['command_usec_total']._metrics.get((cmd_name,), {}).get('value', 0)
self.metrics['command_calls'].labels(command=cmd_name).inc(calls - current_calls)
self.metrics['command_usec_total'].labels(command=cmd_name).inc(usec - current_usec)
return True
except Exception as e:
print(f"收集指标失败: {e}")
return False
def run_exporter(self):
"""运行Exporter"""
# 启动HTTP服务器
start_http_server(self.exporter_port, registry=self.registry)
print(f"Redis Exporter启动在端口 {self.exporter_port}")
# 定期收集指标
while True:
self.collect_metrics()
time.sleep(15) # 每15秒收集一次
class PrometheusDemo:
"""Prometheus配置演示"""
def __init__(self):
pass
def generate_prometheus_config(self):
"""生成Prometheus配置"""
print("=== Prometheus配置 ===")
config = """
# prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'redis'
static_configs:
- targets: ['localhost:9121'] # Redis Exporter
labels:
instance: 'redis-server'
environment: 'production'
- job_name: 'redis_exporter'
static_configs:
- targets: ['localhost:9121']
- job_name: 'redis_cluster'
static_configs:
- targets:
- 'redis-node1:9121'
- 'redis-node2:9121'
- 'redis-node3:9121'
relabel_configs:
- source_labels: [__address__]
target_label: node
regex: '([^:]+)'
replacement: '${1}'
# 告警规则
rule_files:
- 'redis_alerts.yml'
# 告警配置
alerting:
alertmanagers:
- static_configs:
- targets:
- 'localhost:9093'
"""
print(config)
return config
def generate_alert_rules(self):
"""生成告警规则"""
print("\n=== Redis告警规则 ===")
alerts = """
groups:
- name: redis_alerts
interval: 30s
rules:
# Redis宕机告警
- alert: RedisDown
expr: redis_up == 0
for: 1m
annotations:
summary: "Redis实例 {{ $labels.instance }} 宕机"
description: "Redis实例已经不可达超过1分钟"
# 内存使用告警
- alert: RedisMemoryHigh
expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Redis内存使用过高 ({{ $labels.instance }})"
description: "内存使用率 {{ $value }}% 超过80%"
- alert: RedisMemoryCritical
expr: redis_memory_used_bytes / redis_memory_max_bytes * 100 > 90
for: 2m
labels:
severity: critical
annotations:
summary: "Redis内存使用严重过高 ({{ $labels.instance }})"
description: "内存使用率 {{ $value }}% 超过90%"
# 连接数告警
- alert: RedisConnectionsHigh
expr: redis_connected_clients > 8000
for: 5m
labels:
severity: warning
annotations:
summary: "Redis连接数过高 ({{ $labels.instance }})"
description: "当前连接数 {{ $value }} 超过8000"
# 命中率告警
- alert: RedisHitRateLow
expr: (sum(rate(redis_keyspace_hits_total[5m])) / (sum(rate(redis_keyspace_hits_total[5m])) + sum(rate(redis_keyspace_misses_total[5m])))) * 100 < 80
for: 10m
labels:
severity: warning
annotations:
summary: "Redis命中率过低 ({{ $labels.instance }})"
description: "命中率 {{ $value | humanize }}% 低于80%"
# 淘汰键告警
- alert: RedisEvictionsHigh
expr: rate(redis_evicted_keys_total[5m]) > 10
for: 5m
labels:
severity: critical
annotations:
summary: "Redis淘汰率过高 ({{ $labels.instance }})"
description: "每秒淘汰 {{ $value | humanize }} 个键"
# 复制延迟告警
- alert: RedisReplicationLag
expr: redis_master_repl_offset - redis_slave_repl_offset > 1000
for: 1m
labels:
severity: warning
annotations:
summary: "Redis复制延迟过高 ({{ $labels.instance }})"
description: "复制延迟 {{ $value }} 字节"
# 过期键告警
- alert: RedisExpiredKeysHigh
expr: rate(redis_expired_keys_total[5m]) > 100
for: 5m
labels:
severity: warning
annotations:
summary: "Redis过期键过多 ({{ $labels.instance }})"
description: "每秒过期 {{ $value | humanize }} 个键"
# 慢查询告警
- alert: RedisSlowQueries
expr: increase(redis_command_calls_total{command="slowlog"}[5m]) > 0
for: 5m
labels:
severity: info
annotations:
summary: "Redis慢查询告警 ({{ $labels.instance }})"
description: "检测到慢查询命令"
# 持久化告警
- alert: RedisLastSaveTooOld
expr: time() - redis_rdb_last_save_timestamp_seconds > 3600
for: 10m
labels:
severity: warning
annotations:
summary: "Redis最后一次RDB保存时间过早 ({{ $labels.instance }})"
description: "超过1小时没有RDB保存"
# 碎片告警
- alert: RedisFragmentationHigh
expr: redis_mem_fragmentation_ratio > 1.5
for: 10m
labels:
severity: warning
annotations:
summary: "Redis内存碎片率过高 ({{ $labels.instance }})"
description: "碎片率 {{ $value }} 超过1.5"
"""
print(alerts)
return alerts
def generate_grafana_dashboard(self):
"""生成Grafana仪表盘配置"""
print("\n=== Grafana仪表盘配置 ===")
dashboard = """
{
"dashboard": {
"title": "Redis监控仪表盘",
"tags": ["redis", "monitoring"],
"timezone": "browser",
"panels": [
{
"title": "Redis连接数",
"type": "graph",
"targets": [
{
"expr": "redis_connected_clients",
"legendFormat": "连接数"
},
{
"expr": "redis_blocked_clients",
"legendFormat": "阻塞连接"
}
],
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0}
},
{
"title": "Redis内存使用",
"type": "graph",
"targets": [
{
"expr": "redis_used_memory_bytes / 1024 / 1024",
"legendFormat": "已用内存(MB)"
},
{
"expr": "redis_used_memory_peak_bytes / 1024 / 1024",
"legendFormat": "峰值内存(MB)"
},
{
"expr": "redis_maxmemory_bytes / 1024 / 1024",
"legendFormat": "最大内存(MB)"
}
],
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0}
},
{
"title": "Redis操作数",
"type": "graph",
"targets": [
{
"expr": "rate(redis_commands_processed_total[1m])",
"legendFormat": "ops"
}
],
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8}
},
{
"title": "Redis命中率",
"type": "graph",
"targets": [
{
"expr": "sum(rate(redis_keyspace_hits_total[1m])) / (sum(rate(redis_keyspace_hits_total[1m])) + sum(rate(redis_keyspace_misses_total[1m]))) * 100",
"legendFormat": "命中率%"
}
],
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 8}
},
{
"title": "Redis网络IO",
"type": "graph",
"targets": [
{
"expr": "redis_instantaneous_input_kbps",
"legendFormat": "输入(KB/s)"
},
{
"expr": "redis_instantaneous_output_kbps",
"legendFormat": "输出(KB/s)"
}
],
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16}
},
{
"title": "Redis过期/淘汰",
"type": "graph",
"targets": [
{
"expr": "rate(redis_expired_keys_total[1m])",
"legendFormat": "过期/s"
},
{
"expr": "rate(redis_evicted_keys_total[1m])",
"legendFormat": "淘汰/s"
}
],
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16}
}
]
}
}
"""
print(dashboard)
return dashboard
def generate_docker_compose(self):
"""生成Docker Compose配置"""
print("\n=== Docker Compose配置 ===")
docker_compose = """
version: '3'
services:
# Redis服务
redis:
image: redis:7-alpine
container_name: redis
ports:
- "6379:6379"
volumes:
- ./redis/data:/data
- ./redis/redis.conf:/usr/local/etc/redis/redis.conf
command: redis-server /usr/local/etc/redis/redis.conf
networks:
- monitoring
# Redis Exporter
redis-exporter:
image: oliver006/redis_exporter:latest
container_name: redis-exporter
ports:
- "9121:9121"
environment:
- REDIS_ADDR=redis:6379
- REDIS_PASSWORD=yourpassword
depends_on:
- redis
networks:
- monitoring
# Prometheus
prometheus:
image: prom/prometheus:latest
container_name: prometheus
ports:
- "9090:9090"
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
- ./prometheus/rules:/etc/prometheus/rules
depends_on:
- redis-exporter
networks:
- monitoring
# Grafana
grafana:
image: grafana/grafana:latest
container_name: grafana
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
volumes:
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards
- ./grafana/datasources:/etc/grafana/provisioning/datasources
depends_on:
- prometheus
networks:
- monitoring
# Alertmanager
alertmanager:
image: prom/alertmanager:latest
container_name: alertmanager
ports:
- "9093:9093"
volumes:
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml
networks:
- monitoring
networks:
monitoring:
driver: bridge
"""
print(docker_compose)
return docker_compose
def generate_installation_script(self):
"""生成安装脚本"""
print("\n=== 安装脚本 ===")
script = """
#!/bin/bash
# 安装Redis Exporter
echo "安装Redis Exporter..."
wget https://github.com/oliver006/redis_exporter/releases/download/v1.45.0/redis_exporter-v1.45.0.linux-amd64.tar.gz
tar xzf redis_exporter-v1.45.0.linux-amd64.tar.gz
cd redis_exporter-v1.45.0.linux-amd64
./redis_exporter -redis.addr localhost:6379 -redis.password yourpassword &
# 安装Prometheus
echo "安装Prometheus..."
wget https://github.com/prometheus/prometheus/releases/download/v2.40.0/prometheus-2.40.0.linux-amd64.tar.gz
tar xzf prometheus-2.40.0.linux-amd64.tar.gz
cd prometheus-2.40.0.linux-amd64
./prometheus --config.file=prometheus.yml &
# 安装Grafana
echo "安装Grafana..."
sudo apt-get install -y software-properties-common
sudo add-apt-repository "deb https://packages.grafana.com/oss/deb stable main"
wget -q -O - https://packages.grafana.com/gpg.key | sudo apt-key add -
sudo apt-get update
sudo apt-get install grafana
sudo systemctl start grafana-server
sudo systemctl enable grafana-server
echo "监控系统安装完成"
echo "访问地址:"
echo " Prometheus: http://localhost:9090"
echo " Grafana: http://localhost:3000 (admin/admin)"
echo " Redis Exporter: http://localhost:9121/metrics"
"""
print(script)
return script
class PrometheusIntegration:
"""Prometheus集成示例"""
def __init__(self, redis_host='localhost', redis_port=6379, redis_password=None):
self.redis_client = redis.Redis(
host=redis_host,
port=redis_port,
password=redis_password,
decode_responses=True
)
def query_prometheus(self, prometheus_url='http://localhost:9090'):
"""查询Prometheus(示例)"""
print("\n=== Prometheus查询 ===")
try:
import requests
# 查询示例
queries = [
('redis_connected_clients', '当前连接数'),
('rate(redis_commands_processed_total[1m])', '每秒命令数'),
('redis_used_memory_bytes / redis_maxmemory_bytes * 100', '内存使用率%'),
('redis_mem_fragmentation_ratio', '内存碎片率')
]
for query, desc in queries:
response = requests.get(
f'{prometheus_url}/api/v1/query',
params={'query': query}
)
if response.status_code == 200:
data = response.json()
if data['status'] == 'success' and data['data']['result']:
print(f"\n{desc}:")
for result in data['data']['result']:
value = result['value'][1]
metric = result.get('metric', {})
print(f" {metric}: {value}")
else:
print(f"\n{desc}: 无数据")
else:
print(f"查询失败: {response.status_code}")
except ImportError:
print("需要安装requests库: pip install requests")
except Exception as e:
print(f"查询失败: {e}")
def simulate_load_for_monitoring(self):
"""模拟负载用于监控"""
print("\n=== 模拟负载 ===")
def write_operations():
for i in range(1000):
key = f"load:key:{i}"
self.redis_client.set(key, f"value_{i}" * 100)
self.redis_client.expire(key, 300)
if i % 100 == 0:
print(f" 已写入 {i} 个键")
def read_operations():
for i in range(5000):
key = f"load:key:{random.randint(0, 999)}"
self.redis_client.get(key)
if i % 1000 == 0:
print(f" 已读取 {i} 次")
def complex_operations():
for i in range(500):
# 执行一些复杂操作
pipe = self.redis_client.pipeline()
for j in range(10):
pipe.hset(f"load:hash:{i}", f"field{j}", f"value{j}")
pipe.sadd(f"load:set:{i}", f"member{j}")
pipe.zadd(f"load:zset:{i}", {f"member{j}": j})
pipe.execute()
if i % 100 == 0:
print(f" 已执行 {i} 批复杂操作")
# 启动多个线程模拟负载
import threading
threads = [
threading.Thread(target=write_operations),
threading.Thread(target=read_operations),
threading.Thread(target=complex_operations)
]
for t in threads:
t.start()
for t in threads:
t.join()
print("负载模拟完成")
def cleanup(self):
"""清理测试数据"""
print("\n=== 清理测试数据 ===")
for key in self.redis_client.scan_iter("load:*"):
self.redis_client.delete(key)
print("测试数据已清理")
def main():
"""主函数"""
parser = argparse.ArgumentParser(description='Redis监控演示')
parser.add_argument('--mode', choices=['exporter', 'prometheus', 'full'], default='full',
help='运行模式')
parser.add_argument('--redis-host', default='localhost', help='Redis主机')
parser.add_argument('--redis-port', type=int, default=6379, help='Redis端口')
parser.add_argument('--redis-password', default='yourpassword', help='Redis密码')
parser.add_argument('--exporter-port', type=int, default=9121, help='Exporter端口')
args = parser.parse_args()
if args.mode == 'exporter':
# 运行Exporter
exporter = RedisExporter(
redis_host=args.redis_host,
redis_port=args.redis_port,
redis_password=args.redis_password,
exporter_port=args.exporter_port
)
exporter.run_exporter()
elif args.mode == 'prometheus':
# 生成Prometheus配置
demo = PrometheusDemo()
demo.generate_prometheus_config()
demo.generate_alert_rules()
demo.generate_grafana_dashboard()
demo.generate_docker_compose()
else: # full
# 完整演示
print("=" * 50)
print("Redis + Prometheus 监控演示")
print("=" * 50)
# 生成配置
demo = PrometheusDemo()
demo.generate_prometheus_config()
demo.generate_alert_rules()
demo.generate_grafana_dashboard()
demo.generate_docker_compose()
# 运行集成示例
integration = PrometheusIntegration(
redis_host=args.redis_host,
redis_port=args.redis_port,
redis_password=args.redis_password
)
try:
# 模拟负载
integration.simulate_load_for_monitoring()
# 查询Prometheus(可选)
integration.query_prometheus()
finally:
integration.cleanup()
if __name__ == "__main__":
main()
总结
本章详细介绍了Redis服务的核心配置和管理功能,包括:
-
数据持久化
- RDB快照持久化:适合数据备份和灾难恢复
- AOF日志持久化:提供更高的数据安全性
- 混合持久化:结合RDB和AOF的优点
-
线程模型
- 单线程事件驱动模型
- IO多线程(Redis 6.0+)
- 异步操作和阻塞命令
-
内存管理
- 过期数据淘汰策略
- listpack紧凑数据结构
- 内存碎片整理
-
性能监控
- SLOWLOG慢日志分析
- 延迟监控和诊断
- 实时性能指标
-
安全管理
- SSL/TLS加密通信
- ACL访问控制
- 用户权限管理
-
监控系统
- RedisInsight可视化工具
- Prometheus + Grafana集成
- 告警规则配置
通过本章的学习,读者可以全面掌握Redis的配置、优化和监控方法,确保Redis在生产环境中的稳定运行。