Docker 部署 Redis 集群完整指南
一、Redis 集群架构设计
1.1 集群架构
scss
┌─────────────────────────────────────────────────────────────────────┐
│ Redis Cluster (6节点) │
├─────────────────────────────────────────────────────────────────────┤
│ │
│ Master1(7001) ──── Slave1(7004) Master2(7002) ──── Slave2(7005) │
│ │ │ │ │ │
│ ───────┴──────────────────┴──────────────┴──────────────────┴──────│
│ │
│ Master3(7003) ──── Slave3(7006) │
│ │
└─────────────────────────────────────────────────────────────────────┘
二、Docker Compose 部署方案
2.1 docker-compose.yml
bash
version: '3.8'
services:
# Redis 节点 1 (主节点)
redis-node-1:
image: redis:7.2-alpine
container_name: redis-cluster-node-1
command: >
redis-server
--bind 0.0.0.0
--port 6379
--cluster-enabled yes
--cluster-config-file nodes.conf
--cluster-node-timeout 5000
--appendonly yes
--appendfsync everysec
--save 900 1
--save 300 10
--save 60 10000
--requirepass ${REDIS_PASSWORD:-Redis123456}
--masterauth ${REDIS_PASSWORD:-Redis123456}
--maxmemory 2gb
--maxmemory-policy allkeys-lru
--loglevel notice
--logfile /data/redis.log
--protected-mode no
ports:
- "7001:6379"
- "17001:16379" # 集群总线端口
volumes:
- redis-data-1:/data
- ./redis.conf:/usr/local/etc/redis/redis.conf
- ./logs/redis-node-1:/var/log/redis
environment:
- REDIS_PORT=6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
- REDIS_CLUSTER_REPLICAS=1
networks:
redis-cluster-network:
ipv4_address: 172.20.0.11
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD:-Redis123456}", "ping"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
deploy:
resources:
limits:
memory: 2G
reservations:
memory: 1G
restart: unless-stopped
# Redis 节点 2 (主节点)
redis-node-2:
image: redis:7.2-alpine
container_name: redis-cluster-node-2
command: >
redis-server
--bind 0.0.0.0
--port 6379
--cluster-enabled yes
--cluster-config-file nodes.conf
--cluster-node-timeout 5000
--appendonly yes
--appendfsync everysec
--save 900 1
--save 300 10
--save 60 10000
--requirepass ${REDIS_PASSWORD:-Redis123456}
--masterauth ${REDIS_PASSWORD:-Redis123456}
--maxmemory 2gb
--maxmemory-policy allkeys-lru
--loglevel notice
--logfile /data/redis.log
--protected-mode no
ports:
- "7002:6379"
- "17002:16379"
volumes:
- redis-data-2:/data
- ./redis.conf:/usr/local/etc/redis/redis.conf
- ./logs/redis-node-2:/var/log/redis
environment:
- REDIS_PORT=6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
networks:
redis-cluster-network:
ipv4_address: 172.20.0.12
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD:-Redis123456}", "ping"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
# Redis 节点 3 (主节点)
redis-node-3:
image: redis:7.2-alpine
container_name: redis-cluster-node-3
command: >
redis-server
--bind 0.0.0.0
--port 6379
--cluster-enabled yes
--cluster-config-file nodes.conf
--cluster-node-timeout 5000
--appendonly yes
--appendfsync everysec
--save 900 1
--save 300 10
--save 60 10000
--requirepass ${REDIS_PASSWORD:-Redis123456}
--masterauth ${REDIS_PASSWORD:-Redis123456}
--maxmemory 2gb
--maxmemory-policy allkeys-lru
--loglevel notice
--logfile /data/redis.log
--protected-mode no
ports:
- "7003:6379"
- "17003:16379"
volumes:
- redis-data-3:/data
- ./redis.conf:/usr/local/etc/redis/redis.conf
- ./logs/redis-node-3:/var/log/redis
environment:
- REDIS_PORT=6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
networks:
redis-cluster-network:
ipv4_address: 172.20.0.13
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD:-Redis123456}", "ping"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
# Redis 节点 4 (从节点)
redis-node-4:
image: redis:7.2-alpine
container_name: redis-cluster-node-4
command: >
redis-server
--bind 0.0.0.0
--port 6379
--cluster-enabled yes
--cluster-config-file nodes.conf
--cluster-node-timeout 5000
--appendonly yes
--appendfsync everysec
--save 900 1
--save 300 10
--save 60 10000
--requirepass ${REDIS_PASSWORD:-Redis123456}
--masterauth ${REDIS_PASSWORD:-Redis123456}
--maxmemory 2gb
--maxmemory-policy allkeys-lru
--loglevel notice
--logfile /data/redis.log
--protected-mode no
ports:
- "7004:6379"
- "17004:16379"
volumes:
- redis-data-4:/data
- ./redis.conf:/usr/local/etc/redis/redis.conf
- ./logs/redis-node-4:/var/log/redis
environment:
- REDIS_PORT=6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
networks:
redis-cluster-network:
ipv4_address: 172.20.0.14
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD:-Redis123456}", "ping"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
# Redis 节点 5 (从节点)
redis-node-5:
image: redis:7.2-alpine
container_name: redis-cluster-node-5
command: >
redis-server
--bind 0.0.0.0
--port 6379
--cluster-enabled yes
--cluster-config-file nodes.conf
--cluster-node-timeout 5000
--appendonly yes
--appendfsync everysec
--save 900 1
--save 300 10
--save 60 10000
--requirepass ${REDIS_PASSWORD:-Redis123456}
--masterauth ${REDIS_PASSWORD:-Redis123456}
--maxmemory 2gb
--maxmemory-policy allkeys-lru
--loglevel notice
--logfile /data/redis.log
--protected-mode no
ports:
- "7005:6379"
- "17005:16379"
volumes:
- redis-data-5:/data
- ./redis.conf:/usr/local/etc/redis/redis.conf
- ./logs/redis-node-5:/var/log/redis
environment:
- REDIS_PORT=6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
networks:
redis-cluster-network:
ipv4_address: 172.20.0.15
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD:-Redis123456}", "ping"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
# Redis 节点 6 (从节点)
redis-node-6:
image: redis:7.2-alpine
container_name: redis-cluster-node-6
command: >
redis-server
--bind 0.0.0.0
--port 6379
--cluster-enabled yes
--cluster-config-file nodes.conf
--cluster-node-timeout 5000
--appendonly yes
--appendfsync everysec
--save 900 1
--save 300 10
--save 60 10000
--requirepass ${REDIS_PASSWORD:-Redis123456}
--masterauth ${REDIS_PASSWORD:-Redis123456}
--maxmemory 2gb
--maxmemory-policy allkeys-lru
--loglevel notice
--logfile /data/redis.log
--protected-mode no
ports:
- "7006:6379"
- "17006:16379"
volumes:
- redis-data-6:/data
- ./redis.conf:/usr/local/etc/redis/redis.conf
- ./logs/redis-node-6:/var/log/redis
environment:
- REDIS_PORT=6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
networks:
redis-cluster-network:
ipv4_address: 172.20.0.16
healthcheck:
test: ["CMD", "redis-cli", "-a", "${REDIS_PASSWORD:-Redis123456}", "ping"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
# Redis Cluster 初始化服务
redis-cluster-init:
image: redis:7.2-alpine
container_name: redis-cluster-init
depends_on:
- redis-node-1
- redis-node-2
- redis-node-3
- redis-node-4
- redis-node-5
- redis-node-6
networks:
- redis-cluster-network
command: >
sh -c "
echo '等待Redis节点启动...';
sleep 30;
echo '检查节点健康状态...';
for i in $$(seq 1 6); do
redis-cli -h 172.20.0.1$$i -p 6379 -a ${REDIS_PASSWORD:-Redis123456} ping 2>/dev/null || echo '节点 $$i 未就绪';
done;
echo '创建Redis集群...';
echo 'yes' | redis-cli --cluster create
172.20.0.11:6379
172.20.0.12:6379
172.20.0.13:6379
172.20.0.14:6379
172.20.0.15:6379
172.20.0.16:6379
--cluster-replicas 1
-a ${REDIS_PASSWORD:-Redis123456};
echo '验证集群状态...';
redis-cli --cluster check 172.20.0.11:6379 -a ${REDIS_PASSWORD:-Redis123456};
echo '集群信息:';
redis-cli -h 172.20.0.11 -p 6379 -a ${REDIS_PASSWORD:-Redis123456} cluster info;
echo '节点信息:';
redis-cli -h 172.20.0.11 -p 6379 -a ${REDIS_PASSWORD:-Redis123456} cluster nodes;
echo 'Redis集群初始化完成!';
"
restart: on-failure
# Redis 集群管理工具
redis-commander:
image: rediscommander/redis-commander:latest
container_name: redis-cluster-web
ports:
- "8081:8081"
environment:
- REDIS_HOSTS=local:redis-node-1:6379:0:Redis123456,local:redis-node-2:6379:0:Redis123456,local:redis-node-3:6379:0:Redis123456
- HTTP_USER=admin
- HTTP_PASSWORD=${REDIS_PASSWORD:-Redis123456}
- REDIS_PORT=6379
- REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
depends_on:
- redis-cluster-init
networks:
- redis-cluster-network
restart: unless-stopped
# Redis 监控工具
redis-exporter:
image: oliver006/redis_exporter:latest
container_name: redis-cluster-exporter
ports:
- "9121:9121"
command:
- '--redis.addr=redis://redis-node-1:6379'
- '--redis.addr=redis://redis-node-2:6379'
- '--redis.addr=redis://redis-node-3:6379'
- '--redis.password=${REDIS_PASSWORD:-Redis123456}'
- '--log-format=json'
- '--namespace=redis_cluster'
environment:
- REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
depends_on:
- redis-cluster-init
networks:
- redis-cluster-network
restart: unless-stopped
networks:
redis-cluster-network:
driver: bridge
ipam:
config:
- subnet: 172.20.0.0/16
gateway: 172.20.0.1
volumes:
redis-data-1:
driver: local
redis-data-2:
driver: local
redis-data-3:
driver: local
redis-data-4:
driver: local
redis-data-5:
driver: local
redis-data-6:
driver: local
2.2 环境变量配置 (.env)
ini
# Redis 集群环境变量配置
REDIS_PASSWORD=Redis123456@Cluster
REDIS_CLUSTER_NAME=production-cluster
REDIS_MAXMEMORY=2gb
REDIS_TIMEOUT=5000
REDIS_APPENDONLY=yes
REDIS_APPENDFSYNC=everysec
# 网络配置
REDIS_CLUSTER_SUBNET=172.20.0.0/16
REDIS_CLUSTER_GATEWAY=172.20.0.1
# 监控配置
PROMETHEUS_ENABLED=true
GRAFANA_ENABLED=true
# 端口映射
REDIS_PORT_START=7001
REDIS_CLUSTER_BUS_PORT_START=17001
REDIS_COMMANDER_PORT=8081
REDIS_EXPORTER_PORT=9121
三、自定义 Redis 配置文件
3.1 redis.conf
bash
# Redis 集群配置文件
# 基于 Redis 7.2 版本
# 网络配置
bind 0.0.0.0
protected-mode no
port 6379
tcp-backlog 511
timeout 0
tcp-keepalive 300
# 常规配置
daemonize no
pidfile /var/run/redis_6379.pid
loglevel notice
logfile /data/redis.log
databases 16
always-show-logo yes
set-proc-title yes
proc-title-template "{title} {listen-addr} {server-mode}"
# 快照配置
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
rdb-del-sync-files no
dir /data
# 主从复制配置
replica-serve-stale-data yes
replica-read-only yes
repl-diskless-sync no
repl-diskless-sync-delay 5
repl-diskless-load disabled
repl-disable-tcp-nodelay no
replica-priority 100
acllog-max-len 128
# 安全配置
requirepass ${REDIS_PASSWORD}
masterauth ${REDIS_PASSWORD}
# 客户端配置
maxclients 10000
maxmemory 2gb
maxmemory-policy allkeys-lru
maxmemory-samples 5
lazyfree-lazy-eviction no
lazyfree-lazy-expire no
lazyfree-lazy-server-del no
replica-lazy-flush no
lazyfree-lazy-user-del no
oom-score-adj no
oom-score-adj-values 0 200 800
# 持久化配置
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
aof-load-truncated yes
aof-use-rdb-preamble yes
# Lua 脚本配置
lua-time-limit 5000
# 慢查询日志
slowlog-log-slower-than 10000
slowlog-max-len 128
latency-monitor-threshold 0
# 事件通知
notify-keyspace-events ""
# 高级配置
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-size -2
list-compress-depth 0
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
hll-sparse-max-bytes 3000
stream-node-max-bytes 4096
stream-node-max-entries 100
activerehashing yes
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit replica 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
hz 10
dynamic-hz yes
aof-rewrite-incremental-fsync yes
rdb-save-incremental-fsync yes
jemalloc-bg-thread yes
# 集群配置
cluster-enabled yes
cluster-config-file /data/nodes.conf
cluster-node-timeout 5000
cluster-replica-validity-factor 10
cluster-migration-barrier 1
cluster-require-full-coverage yes
cluster-replica-no-failover no
cluster-allow-reads-when-down no
cluster-allow-pubsubshard-when-down no
cluster-link-sendbuf-limit 0
cluster-announce-ip ${HOST_IP}
cluster-announce-port 6379
cluster-announce-bus-port 16379
# 模块配置
loadmodule /usr/lib/redis/modules/redisearch.so
loadmodule /usr/lib/redis/modules/rejson.so
loadmodule /usr/lib/redis/modules/redisgraph.so
loadmodule /usr/lib/redis/modules/redistimeseries.so
loadmodule /usr/lib/redis/modules/redisbloom.so
# TLS/SSL 配置
# tls-port 0
# tls-cert-file /etc/redis/certs/redis.crt
# tls-key-file /etc/redis/certs/redis.key
# tls-ca-cert-file /etc/redis/certs/ca.crt
# tls-auth-clients no
# tls-auth-clients optional
# tls-protocols "TLSv1.2 TLSv1.3"
# tls-ciphers DEFAULT:!MEDIUM
# tls-ciphersuites TLS_CHACHA20_POLY1305_SHA256
# tls-prefer-server-ciphers yes
# tls-session-caching no
# tls-session-cache-size 20480
# tls-session-cache-timeout 300
四、启动和部署脚本
4.1 启动脚本 (start-cluster.sh)
bash
#!/bin/bash
# Redis Cluster 启动脚本
# 支持 Docker 和 Docker Compose 部署
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 日志函数
log_info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# 检查依赖
check_dependencies() {
log_info "检查系统依赖..."
# 检查 Docker
if ! command -v docker &> /dev/null; then
log_error "Docker 未安装"
exit 1
fi
# 检查 Docker Compose
if ! command -v docker-compose &> /dev/null; then
log_error "Docker Compose 未安装"
exit 1
fi
log_success "依赖检查通过"
}
# 创建目录结构
create_directories() {
log_info "创建目录结构..."
mkdir -p ./data/{node1,node2,node3,node4,node5,node6}
mkdir -p ./logs/{node1,node2,node3,node4,node5,node6}
mkdir -p ./config
mkdir -p ./ssl
log_success "目录创建完成"
}
# 生成配置文件
generate_configs() {
log_info "生成 Redis 配置文件..."
# 生成主配置文件
cat > ./config/redis.conf << 'EOF'
# Redis 集群配置文件
bind 0.0.0.0
port 6379
cluster-enabled yes
cluster-config-file nodes.conf
cluster-node-timeout 5000
appendonly yes
requirepass ${REDIS_PASSWORD}
masterauth ${REDIS_PASSWORD}
EOF
# 生成节点配置文件
for i in {1..6}; do
cat > ./config/redis-node-$i.conf << EOF
# Redis 节点 $i 配置
bind 0.0.0.0
port 6379
cluster-enabled yes
cluster-config-file /data/nodes.conf
cluster-node-timeout 5000
appendonly yes
appendfilename "appendonly-node-$i.aof"
dbfilename "dump-node-$i.rdb"
dir /data
requirepass ${REDIS_PASSWORD:-Redis123456}
masterauth ${REDIS_PASSWORD:-Redis123456}
maxmemory 2gb
maxmemory-policy allkeys-lru
loglevel notice
logfile /var/log/redis/redis.log
protected-mode no
EOF
done
log_success "配置文件生成完成"
}
# 启动 Redis 集群
start_cluster() {
log_info "启动 Redis 集群..."
# 加载环境变量
if [ -f .env ]; then
log_info "加载环境变量..."
export $(grep -v '^#' .env | xargs)
fi
# 启动集群
docker-compose up -d
# 等待集群启动
log_info "等待集群启动..."
sleep 10
# 检查集群状态
check_cluster_health
log_success "Redis 集群启动完成"
}
# 检查集群健康状态
check_cluster_health() {
log_info "检查集群健康状态..."
local retries=10
local wait_time=5
for ((i=1; i<=retries; i++)); do
log_info "尝试 $i/$retries 检查集群状态..."
# 检查节点是否可访问
if docker exec redis-cluster-node-1 redis-cli -a "${REDIS_PASSWORD:-Redis123456}" ping 2>/dev/null | grep -q "PONG"; then
# 获取集群信息
local cluster_info=$(docker exec redis-cluster-node-1 redis-cli -a "${REDIS_PASSWORD:-Redis123456}" cluster info 2>/dev/null)
if echo "$cluster_info" | grep -q "cluster_state:ok"; then
log_success "集群状态正常"
# 显示集群信息
echo "========================================"
echo "Redis 集群信息:"
echo "========================================"
docker exec redis-cluster-node-1 redis-cli -a "${REDIS_PASSWORD:-Redis123456}" cluster info
echo "========================================"
echo "集群节点:"
docker exec redis-cluster-node-1 redis-cli -a "${REDIS_PASSWORD:-Redis123456}" cluster nodes
echo "========================================"
return 0
fi
fi
if [ $i -lt $retries ]; then
log_info "等待 ${wait_time} 秒后重试..."
sleep $wait_time
fi
done
log_error "集群健康检查失败"
return 1
}
# 停止集群
stop_cluster() {
log_info "停止 Redis 集群..."
docker-compose down
log_success "Redis 集群已停止"
}
# 重启集群
restart_cluster() {
log_info "重启 Redis 集群..."
stop_cluster
sleep 5
start_cluster
}
# 清理集群
clean_cluster() {
log_warn "清理 Redis 集群数据..."
read -p "确认要删除所有数据吗?(y/n): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
docker-compose down -v
rm -rf ./data/* ./logs/*
log_success "集群数据已清理"
else
log_info "取消清理操作"
fi
}
# 集群扩容
scale_cluster() {
log_info "集群扩容..."
echo "选择扩容方式:"
echo "1) 增加主节点"
echo "2) 增加从节点"
echo "3) 退出"
read -p "请选择 (1-3): " choice
case $choice in
1)
add_master_node
;;
2)
add_slave_node
;;
3)
log_info "退出扩容"
;;
*)
log_error "无效选择"
;;
esac
}
# 添加主节点
add_master_node() {
log_info "添加主节点..."
# 获取下一个节点编号
local next_node=$(($(ls -d ./data/node* 2>/dev/null | wc -l) + 1))
# 创建数据目录
mkdir -p ./data/node$next_node
mkdir -p ./logs/node$next_node
# 生成节点配置
cat > ./config/redis-node-$next_node.conf << EOF
bind 0.0.0.0
port 6379
cluster-enabled yes
cluster-config-file /data/nodes.conf
cluster-node-timeout 5000
appendonly yes
dir /data
requirepass ${REDIS_PASSWORD:-Redis123456}
masterauth ${REDIS_PASSWORD:-Redis123456}
EOF
# 更新 docker-compose.yml
# 这里需要手动更新 docker-compose.yml 文件
log_warn "请手动更新 docker-compose.yml 文件添加新节点"
log_info "然后运行: docker-compose up -d redis-node-$next_node"
log_info "最后将新节点加入集群: docker exec redis-cluster-node-1 redis-cli --cluster add-node new_host:new_port existing_host:existing_port"
}
# 备份集群
backup_cluster() {
log_info "备份集群数据..."
local backup_dir="./backups/$(date +%Y%m%d_%H%M%S)"
mkdir -p "$backup_dir"
# 备份数据
for i in {1..6}; do
if docker ps | grep -q "redis-cluster-node-$i"; then
log_info "备份节点 $i 数据..."
docker exec redis-cluster-node-$i redis-cli -a "${REDIS_PASSWORD:-Redis123456}" save
cp -r ./data/node$i "$backup_dir/"
fi
done
# 备份配置
cp -r ./config "$backup_dir/"
cp docker-compose.yml "$backup_dir/"
cp .env "$backup_dir/" 2>/dev/null || true
# 创建备份元数据
cat > "$backup_dir/backup.info" << EOF
备份时间: $(date)
集群节点: 6
Redis 版本: 7.2
备份目录: $backup_dir
EOF
# 压缩备份
tar -czf "$backup_dir.tar.gz" -C ./backups "$(basename $backup_dir)"
log_success "集群备份完成: $backup_dir.tar.gz"
}
# 监控集群
monitor_cluster() {
log_info "启动集群监控..."
echo "选择监控方式:"
echo "1) 实时监控"
echo "2) 查看节点信息"
echo "3) 查看内存使用"
echo "4) 查看慢查询"
echo "5) 返回"
read -p "请选择 (1-5): " choice
case $choice in
1)
docker exec -it redis-cluster-node-1 redis-cli -a "${REDIS_PASSWORD:-Redis123456}" --stat
;;
2)
docker exec redis-cluster-node-1 redis-cli -a "${REDIS_PASSWORD:-Redis123456}" cluster nodes
;;
3)
docker exec redis-cluster-node-1 redis-cli -a "${REDIS_PASSWORD:-Redis123456}" info memory
;;
4)
docker exec redis-cluster-node-1 redis-cli -a "${REDIS_PASSWORD:-Redis123456}" slowlog get 10
;;
5)
return
;;
*)
log_error "无效选择"
;;
esac
}
# 主菜单
show_menu() {
clear
echo "========================================"
echo " Redis Cluster 管理工具"
echo "========================================"
echo "1) 启动集群"
echo "2) 停止集群"
echo "3) 重启集群"
echo "4) 检查状态"
echo "5) 集群扩容"
echo "6) 备份集群"
echo "7) 监控集群"
echo "8) 清理集群"
echo "9) 退出"
echo "========================================"
}
# 主函数
main() {
# 检查依赖
check_dependencies
# 创建目录
create_directories
# 生成配置
generate_configs
while true; do
show_menu
read -p "请选择操作 (1-9): " choice
case $choice in
1)
start_cluster
;;
2)
stop_cluster
;;
3)
restart_cluster
;;
4)
check_cluster_health
;;
5)
scale_cluster
;;
6)
backup_cluster
;;
7)
monitor_cluster
;;
8)
clean_cluster
;;
9)
log_info "退出管理工具"
exit 0
;;
*)
log_error "无效选择,请重新输入"
;;
esac
read -p "按 Enter 键继续..."
done
}
# 执行主函数
main "$@"
4.2 集群测试脚本 (test-cluster.sh)
bash
#!/bin/bash
# Redis Cluster 测试脚本
set -e
# 颜色定义
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# 日志函数
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# 加载环境变量
if [ -f .env ]; then
export $(grep -v '^#' .env | xargs)
fi
REDIS_PASSWORD=${REDIS_PASSWORD:-Redis123456}
REDIS_HOST=${REDIS_HOST:-localhost}
PORTS=(7001 7002 7003 7004 7005 7006)
# 测试连接
test_connection() {
log_info "测试 Redis 集群连接..."
for port in "${PORTS[@]}"; do
if redis-cli -h $REDIS_HOST -p $port -a "$REDIS_PASSWORD" ping 2>/dev/null | grep -q "PONG"; then
log_success "节点 $REDIS_HOST:$port 连接成功"
else
log_error "节点 $REDIS_HOST:$port 连接失败"
return 1
fi
done
return 0
}
# 测试集群状态
test_cluster_status() {
log_info "测试集群状态..."
local cluster_info=$(redis-cli -h $REDIS_HOST -p 7001 -a "$REDIS_PASSWORD" cluster info 2>/dev/null)
if echo "$cluster_info" | grep -q "cluster_state:ok"; then
log_success "集群状态正常"
echo "$cluster_info"
return 0
else
log_error "集群状态异常"
echo "$cluster_info"
return 1
fi
}
# 测试数据读写
test_data_operations() {
log_info "测试数据读写操作..."
local test_key="test:cluster:$(date +%s)"
local test_value="test_value_$(date +%s)"
# 写入数据
if redis-cli -h $REDIS_HOST -p 7001 -a "$REDIS_PASSWORD" set "$test_key" "$test_value" 2>/dev/null | grep -q "OK"; then
log_success "数据写入成功: $test_key = $test_value"
else
log_error "数据写入失败"
return 1
fi
# 读取数据
local read_value=$(redis-cli -h $REDIS_HOST -p 7001 -a "$REDIS_PASSWORD" get "$test_key" 2>/dev/null)
if [ "$read_value" = "$test_value" ]; then
log_success "数据读取成功: $test_key = $read_value"
else
log_error "数据读取失败"
return 1
fi
# 删除数据
if redis-cli -h $REDIS_HOST -p 7001 -a "$REDIS_PASSWORD" del "$test_key" 2>/dev/null | grep -q "^1$"; then
log_success "数据删除成功"
else
log_error "数据删除失败"
return 1
fi
return 0
}
# 测试集群重定向
test_cluster_redirect() {
log_info "测试集群重定向..."
# 创建多个键,确保它们分布在不同节点
local keys=()
for i in {1..100}; do
keys+=("cluster:key:$i")
done
local success_count=0
local fail_count=0
for key in "${keys[@]}"; do
# 尝试在随机节点上设置值
local random_port=${PORTS[$RANDOM % ${#PORTS[@]}]}
local result=$(redis-cli -h $REDIS_HOST -p $random_port -a "$REDIS_PASSWORD" set "$key" "value_$key" 2>&1)
if echo "$result" | grep -q "MOVED\|OK"; then
((success_count++))
else
((fail_count++))
log_warn "键 $key 在端口 $random_port 设置失败: $result"
fi
done
log_info "重定向测试结果: 成功 $success_count, 失败 $fail_count"
if [ $fail_count -eq 0 ]; then
log_success "集群重定向测试通过"
return 0
else
log_error "集群重定向测试失败"
return 1
fi
}
# 测试故障转移
test_failover() {
log_info "测试故障转移(模拟主节点故障)..."
# 获取当前主节点
local master_nodes=$(redis-cli -h $REDIS_HOST -p 7001 -a "$REDIS_PASSWORD" cluster nodes 2>/dev/null | grep "master" | head -3)
local first_master=$(echo "$master_nodes" | head -1 | awk '{print $2}')
if [ -z "$first_master" ]; then
log_error "无法获取主节点信息"
return 1
fi
local master_host=$(echo "$first_master" | cut -d: -f1)
local master_port=$(echo "$first_master" | cut -d: -f2)
log_info "模拟主节点 $master_host:$master_port 故障..."
# 停止主节点容器
local container_name=$(docker ps --format "{{.Names}}" | grep "$master_port" | head -1)
if [ -n "$container_name" ]; then
log_info "停止容器: $container_name"
docker stop "$container_name"
# 等待故障转移
log_info "等待故障转移 (30秒)..."
sleep 30
# 检查新的主节点
local new_master=$(redis-cli -h $REDIS_HOST -p 7002 -a "$REDIS_PASSWORD" cluster nodes 2>/dev/null | grep "$master_host:$master_port" | grep "slave")
if [ -n "$new_master" ]; then
log_success "故障转移成功: 原主节点 $master_host:$master_port 变为从节点"
else
log_error "故障转移失败"
fi
# 恢复节点
log_info "恢复节点..."
docker start "$container_name"
sleep 10
else
log_error "未找到对应的容器"
return 1
fi
return 0
}
# 性能测试
test_performance() {
log_info "运行性能测试..."
# 使用 redis-benchmark 测试
log_info "1. 测试 SET 操作..."
redis-benchmark -h $REDIS_HOST -p 7001 -a "$REDIS_PASSWORD" -t set -n 10000 -c 50
log_info "2. 测试 GET 操作..."
redis-benchmark -h $REDIS_HOST -p 7001 -a "$REDIS_PASSWORD" -t get -n 10000 -c 50
log_info "3. 测试流水线操作..."
redis-benchmark -h $REDIS_HOST -p 7001 -a "$REDIS_PASSWORD" -t ping,set,get -n 10000 -c 50 -P 16
log_success "性能测试完成"
}
# 运行所有测试
run_all_tests() {
log_info "开始 Redis 集群完整测试..."
local tests_passed=0
local tests_failed=0
# 测试1: 连接测试
if test_connection; then
((tests_passed++))
else
((tests_failed++))
fi
# 测试2: 集群状态测试
if test_cluster_status; then
((tests_passed++))
else
((tests_failed++))
fi
# 测试3: 数据操作测试
if test_data_operations; then
((tests_passed++))
else
((tests_failed++))
fi
# 测试4: 集群重定向测试
if test_cluster_redirect; then
((tests_passed++))
else
((tests_failed++))
fi
# 测试5: 性能测试
test_performance
echo "========================================"
echo "测试结果汇总:"
echo "通过: $tests_passed"
echo "失败: $tests_failed"
echo "========================================"
if [ $tests_failed -eq 0 ]; then
log_success "所有测试通过!Redis 集群运行正常。"
return 0
else
log_error "有 $tests_failed 个测试失败"
return 1
fi
}
# 主函数
main() {
echo "========================================"
echo " Redis Cluster 测试工具"
echo "========================================"
echo "1) 运行完整测试"
echo "2) 测试连接"
echo "3) 测试集群状态"
echo "4) 测试数据操作"
echo "5) 测试集群重定向"
echo "6) 测试故障转移"
echo "7) 性能测试"
echo "8) 退出"
echo "========================================"
read -p "请选择测试项目 (1-8): " choice
case $choice in
1)
run_all_tests
;;
2)
test_connection
;;
3)
test_cluster_status
;;
4)
test_data_operations
;;
5)
test_cluster_redirect
;;
6)
test_failover
;;
7)
test_performance
;;
8)
log_info "退出测试工具"
exit 0
;;
*)
log_error "无效选择"
exit 1
;;
esac
}
# 执行主函数
main "$@"
五、Kubernetes 部署方案
5.1 Redis Cluster Kubernetes 部署文件
5.1.1 namespace.yaml
yaml
apiVersion: v1
kind: Namespace
metadata:
name: redis-cluster
labels:
name: redis-cluster
component: database
environment: production
5.1.2 configmap.yaml
arduino
apiVersion: v1
kind: ConfigMap
metadata:
name: redis-cluster-config
namespace: redis-cluster
data:
redis.conf: |
# Redis 集群配置
bind 0.0.0.0
port 6379
cluster-enabled yes
cluster-config-file /data/nodes.conf
cluster-node-timeout 5000
cluster-slave-validity-factor 10
cluster-migration-barrier 1
cluster-require-full-coverage yes
appendonly yes
appendfilename "appendonly.aof"
appendfsync everysec
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
dir /data
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename dump.rdb
maxmemory 2gb
maxmemory-policy allkeys-lru
maxmemory-samples 5
lazyfree-lazy-eviction no
lazyfree-lazy-expire no
lazyfree-lazy-server-del no
replica-lazy-flush no
lua-time-limit 5000
slowlog-log-slower-than 10000
slowlog-max-len 128
latency-monitor-threshold 0
notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-size -2
list-compress-depth 0
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
hll-sparse-max-bytes 3000
stream-node-max-bytes 4096
stream-node-max-entries 100
activerehashing yes
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit replica 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
hz 10
dynamic-hz yes
aof-rewrite-incremental-fsync yes
rdb-save-incremental-fsync yes
5.1.3 secret.yaml
yaml
apiVersion: v1
kind: Secret
metadata:
name: redis-cluster-secret
namespace: redis-cluster
type: Opaque
stringData:
redis-password: "Redis123456@K8s"
requirepass: "Redis123456@K8s"
masterauth: "Redis123456@K8s"
5.1.4 service.yaml
yaml
apiVersion: v1
kind: Service
metadata:
name: redis-cluster
namespace: redis-cluster
labels:
app: redis-cluster
component: database
spec:
ports:
- name: client
port: 6379
targetPort: 6379
- name: cluster
port: 16379
targetPort: 16379
clusterIP: None
selector:
app: redis-cluster
---
apiVersion: v1
kind: Service
metadata:
name: redis-cluster-headless
namespace: redis-cluster
labels:
app: redis-cluster
component: database
spec:
ports:
- name: client
port: 6379
targetPort: 6379
- name: cluster
port: 16379
targetPort: 16379
clusterIP: None
selector:
app: redis-cluster
5.1.5 statefulset.yaml
yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: redis-cluster
namespace: redis-cluster
labels:
app: redis-cluster
component: database
environment: production
spec:
serviceName: redis-cluster-headless
replicas: 6
selector:
matchLabels:
app: redis-cluster
updateStrategy:
type: RollingUpdate
podManagementPolicy: OrderedReady
template:
metadata:
labels:
app: redis-cluster
component: database
environment: production
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "9121"
prometheus.io/path: "/metrics"
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- weight: 100
podAffinityTerm:
labelSelector:
matchExpressions:
- key: app
operator: In
values:
- redis-cluster
topologyKey: kubernetes.io/hostname
containers:
- name: redis
image: redis:7.2-alpine
imagePullPolicy: IfNotPresent
command:
- redis-server
- /etc/redis/redis.conf
- --requirepass
- $(REDIS_PASSWORD)
- --masterauth
- $(REDIS_PASSWORD)
ports:
- name: client
containerPort: 6379
- name: cluster
containerPort: 16379
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-cluster-secret
key: redis-password
- name: POD_IP
valueFrom:
fieldRef:
fieldPath: status.podIP
resources:
requests:
memory: "2Gi"
cpu: "1000m"
limits:
memory: "4Gi"
cpu: "2000m"
volumeMounts:
- name: redis-config
mountPath: /etc/redis
- name: redis-data
mountPath: /data
- name: redis-tmp
mountPath: /tmp
livenessProbe:
exec:
command:
- redis-cli
- -a
- $(REDIS_PASSWORD)
- ping
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
readinessProbe:
exec:
command:
- redis-cli
- -a
- $(REDIS_PASSWORD)
- ping
initialDelaySeconds: 5
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
startupProbe:
exec:
command:
- redis-cli
- -a
- $(REDIS_PASSWORD)
- ping
initialDelaySeconds: 0
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 30
securityContext:
privileged: false
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
volumes:
- name: redis-config
configMap:
name: redis-cluster-config
- name: redis-tmp
emptyDir: {}
volumeClaimTemplates:
- metadata:
name: redis-data
spec:
accessModes:
- ReadWriteOnce
storageClassName: standard
resources:
requests:
storage: 20Gi
5.1.6 job-init.yaml
bash
apiVersion: batch/v1
kind: Job
metadata:
name: redis-cluster-init
namespace: redis-cluster
spec:
template:
spec:
containers:
- name: redis-cluster-init
image: redis:7.2-alpine
command:
- /bin/sh
- -c
- |
echo "等待 Redis 节点启动..."
sleep 30
echo "获取 Pod IP 地址..."
POD_IPS=()
for i in {0..5}; do
IP=$(getent hosts redis-cluster-$i.redis-cluster-headless.redis-cluster.svc.cluster.local | awk '{ print $1 }')
if [ -n "$IP" ]; then
POD_IPS+=("$IP:6379")
echo "节点 $i IP: $IP"
fi
done
echo "创建 Redis 集群..."
echo "yes" | redis-cli --cluster create \
${POD_IPS[@]} \
--cluster-replicas 1 \
-a $(REDIS_PASSWORD)
echo "验证集群状态..."
redis-cli --cluster check ${POD_IPS[0]} -a $(REDIS_PASSWORD)
echo "集群信息:"
redis-cli -h ${POD_IPS[0]%:*} -p 6379 -a $(REDIS_PASSWORD) cluster info
echo "节点信息:"
redis-cli -h ${POD_IPS[0]%:*} -p 6379 -a $(REDIS_PASSWORD) cluster nodes
echo "Redis 集群初始化完成!"
env:
- name: REDIS_PASSWORD
valueFrom:
secretKeyRef:
name: redis-cluster-secret
key: redis-password
restartPolicy: OnFailure
backoffLimit: 3
activeDeadlineSeconds: 300
六、监控和告警配置
6.1 Prometheus 监控配置
yaml
# prometheus-config.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'redis-cluster'
static_configs:
- targets:
- 'redis-cluster-exporter:9121'
metrics_path: /metrics
relabel_configs:
- source_labels: [__address__]
target_label: instance
regex: '(.*):.*'
replacement: '${1}'
- job_name: 'redis-cluster-nodes'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- redis-cluster
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app]
regex: redis-cluster
action: keep
- source_labels: [__meta_kubernetes_pod_container_port_number]
regex: 6379
action: keep
- source_labels: [__meta_kubernetes_pod_ip]
target_label: instance
replacement: '${1}:6379'
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod
- source_labels: [__meta_kubernetes_namespace]
target_label: namespace
6.2 Grafana 仪表板
xml
{
"dashboard": {
"id": null,
"title": "Redis Cluster Monitoring",
"tags": ["redis", "cluster", "database"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "Cluster Health",
"type": "stat",
"targets": [
{
"expr": "redis_up",
"legendFormat": "{{instance}}"
}
],
"fieldConfig": {
"defaults": {
"thresholds": {
"steps": [
{"color": "red", "value": null},
{"color": "green", "value": 1}
]
}
}
}
},
{
"id": 2,
"title": "Memory Usage",
"type": "graph",
"targets": [
{
"expr": "redis_memory_used_bytes / 1024 / 1024",
"legendFormat": "Used Memory (MB) - {{instance}}"
},
{
"expr": "redis_memory_max_bytes / 1024 / 1024",
"legendFormat": "Max Memory (MB) - {{instance}}"
}
]
},
{
"id": 3,
"title": "Commands per Second",
"type": "graph",
"targets": [
{
"expr": "rate(redis_commands_processed_total[5m])",
"legendFormat": "Commands/s - {{instance}}"
}
]
},
{
"id": 4,
"title": "Connections",
"type": "graph",
"targets": [
{
"expr": "redis_connected_clients",
"legendFormat": "Connected Clients - {{instance}}"
}
]
},
{
"id": 5,
"title": "Keyspace",
"type": "graph",
"targets": [
{
"expr": "redis_db_keys",
"legendFormat": "Keys - db{{db}}"
}
]
},
{
"id": 6,
"title": "CPU Usage",
"type": "graph",
"targets": [
{
"expr": "rate(redis_cpu_sys_seconds_total[5m]) * 100",
"legendFormat": "System CPU % - {{instance}}"
},
{
"expr": "rate(redis_cpu_user_seconds_total[5m]) * 100",
"legendFormat": "User CPU % - {{instance}}"
}
]
}
],
"time": {
"from": "now-6h",
"to": "now"
}
}
}
七、使用说明
7.1 快速开始
bash
# 1. 克隆仓库
git clone https://github.com/your-repo/redis-cluster-docker.git
cd redis-cluster-docker
# 2. 配置环境变量
cp .env.example .env
# 编辑 .env 文件,设置密码和其他配置
# 3. 启动集群
chmod +x start-cluster.sh
./start-cluster.sh
# 4. 测试集群
chmod +x test-cluster.sh
./test-cluster.sh
# 5. 访问管理界面
# Redis Commander: http://localhost:8081
# 用户名: admin
# 密码: 你的REDIS_PASSWORD
7.2 常用命令
bash
# 查看集群状态
docker exec redis-cluster-node-1 redis-cli -a "你的密码" cluster info
# 查看集群节点
docker exec redis-cluster-node-1 redis-cli -a "你的密码" cluster nodes
# 查看集群槽分配
docker exec redis-cluster-node-1 redis-cli -a "你的密码" cluster slots
# 重启集群
docker-compose restart
# 查看日志
docker-compose logs -f redis-node-1
# 进入容器
docker exec -it redis-cluster-node-1 sh
# 备份集群
docker-compose exec redis-cluster-node-1 redis-cli -a "你的密码" save
7.3 故障处理
bash
# 1. 节点故障恢复
# 如果某个节点故障,重启该节点即可
docker-compose restart redis-node-1
# 2. 集群重新初始化
# 如果集群配置损坏,可以重新初始化
docker-compose down -v
./start-cluster.sh
# 3. 数据恢复
# 从备份恢复数据
tar -xzf backups/20240101_120000.tar.gz
cp -r backups/20240101_120000/data/node1/* ./data/node1/
# 重启集群
# 4. 性能调优
# 调整内存限制
# 在 .env 文件中设置 REDIS_MAXMEMORY=4gb
# 重启集群
八、安全和优化建议
8.1 安全配置
yaml
# 安全配置最佳实践
security:
# 1. 强密码策略
requirepass: "复杂密码@包含大小写数字特殊字符"
# 2. 禁用危险命令
rename-command:
FLUSHDB: ""
FLUSHALL: ""
CONFIG: ""
KEYS: ""
SHUTDOWN: ""
# 3. 网络限制
bind: 0.0.0.0 # 生产环境应绑定特定IP
protected-mode: yes
# 4. SSL/TLS 加密
tls-port: 6380
tls-cert-file: /path/to/redis.crt
tls-key-file: /path/to/redis.key
tls-ca-cert-file: /path/to/ca.crt
# 5. 客户端限制
maxclients: 10000
timeout: 300
# 6. 慢查询日志
slowlog-log-slower-than: 10000
slowlog-max-len: 128
8.2 性能优化
yaml
# 性能优化配置
performance:
# 1. 内存优化
maxmemory: "4gb"
maxmemory-policy: "allkeys-lru"
maxmemory-samples: 5
# 2. 持久化优化
appendonly: yes
appendfsync: everysec
auto-aof-rewrite-percentage: 100
auto-aof-rewrite-min-size: 64mb
# 3. 网络优化
tcp-keepalive: 300
tcp-backlog: 511
# 4. 集群优化
cluster-node-timeout: 5000
cluster-replica-validity-factor: 10
# 5. 内核参数优化
# 在宿主机上设置
# echo never > /sys/kernel/mm/transparent_hugepage/enabled
# sysctl -w net.core.somaxconn=65535
# sysctl -w vm.overcommit_memory=1
这个 Redis 集群部署方案提供了完整的生产级解决方案,包含 Docker Compose 部署、Kubernetes 部署、监控告警、备份恢复等功能,可以直接用于生产环境。