背景:
业务系统需要使用redis集群做缓存,但是redis集群不稳定会出现宕机问题,宕机后redis自动重启,但是业务系统不能自动重连redis。
改进方案:
当redis集群故障服务不能使用时,业务系统也要随redis一起重启。在业务系统的容器中添加redis服务探查程序。更好的方式是业务系统支持redis的服务的自动重连
apiVersion: apps/v1
kind: Deployment
metadata:
name: business-app-with-monitor
spec:
replicas: 3
template:
metadata:
labels:
app: business-app
spec:
initContainers:
- name: redis-preflight-check
image: busybox:1.35
command:
- /bin/sh
- -c
- |
# 简化的启动前检查
echo "执行 Redis 集群启动前检查..."
for i in 0 1 2; do
if ! nc -z -w 5 redis-$i.redis-headless 6379; then
echo "❌ redis-$i 不可用"
exit 1
fi
echo "✅ redis-$i 连接成功"
done
echo "所有 Redis 节点就绪"
containers:
# 主业务容器
- name: business-app
image: your-business-app:latest
ports:
- containerPort: 8080
env:
- name: REDIS_CLUSTER_NODES
value: "redis-0.redis-headless:6379,redis-1.redis-headless:6379,redis-2.redis-headless:6379"
livenessProbe:
exec:
command:
- /bin/sh
- -c
- |
# 检查应用是否健康,同时检查 Redis 连接
if ! curl -f http://localhost:8080/health; then
exit 1
fi
# 检查 Redis 集群状态
if ! echo "CLUSTER INFO" | nc -w 3 redis-0.redis-headless 6379 | grep -q "cluster_state:ok"; then
echo "Redis 集群异常,触发重启"
exit 1
fi
initialDelaySeconds: 30
periodSeconds: 15
timeoutSeconds: 10
failureThreshold: 2
# Redis 监控 Sidecar
- name: redis-monitor
image: busybox:1.35
command:
- /bin/sh
- -c
- |
echo "启动 Redis 集群监控..."
while true; do
echo "$(date): 检查 Redis 集群状态..."
# 检查集群健康状态
cluster_healthy=false
for node in redis-0.redis-headless:6379 redis-1.redis-headless:6379 redis-2.redis-headless:6379; do
host=${node%:*}
port=${node#*:}
if echo "CLUSTER INFO" | nc -w 5 $host $port 2>/dev/null | grep -q "cluster_state:ok"; then
cluster_healthy=true
break
fi
done
if [ "$cluster_healthy" = "false" ]; then
echo "❌ $(date): Redis 集群状态异常,触发业务容器重启"
# 向业务容器发送 SIGTERM 信号,触发重启
kill -TERM 1
sleep 10
else
echo "✅ $(date): Redis 集群状态正常"
fi
sleep 30
done
resources:
requests:
memory: "32Mi"
cpu: "10m"