Redis 集群模式详解(下篇):故障转移、扩容缩容与生产实战
本文是 Redis Cluster 系列的下篇,深入讲解请求重定向、故障检测与转移、槽位迁移、在线扩容缩容的完整流程,以及生产环境的最佳实践。
📖 目录
- 请求重定向机制
- 故障检测与转移
- 扩容与缩容
- [Cluster 限制与解决方案](#Cluster 限制与解决方案 "#cluster-%E9%99%90%E5%88%B6%E4%B8%8E%E8%A7%A3%E5%86%B3%E6%96%B9%E6%A1%88")
- 客户端实现
- 生产环境部署
- 性能优化
- 常见问题解答
请求重定向机制
MOVED 重定向
完整流程:
ini
┌────────────────────────────────────────────────┐
│ MOVED 重定向详细流程 │
└────────────────────────────────────────────────┘
Client Node1 (Slot 0-5460)
│ │
│─── GET user:1001 ────────────→│
│ │
│ │ // 处理请求
│ │ slot = keyHashSlot("user:1001");
│ │ // slot = 14909
│ │
│ │ n = server.cluster->slots[14909];
│ │ // n = Node3
│ │
│ │ if (n != myself) {
│ │ // 不在本节点
│←── -MOVED 14909 ip3:6381 ────│ clusterRedirectClient(MOVED);
│ │ }
│ │
│ // 客户端缓存更新 │
│ slotCache[14909] = Node3 │
│ │
│─── GET user:1001 ────────────→│ Node3 (Slot 10923-16383)
│ │
│ │ slot = 14909
│ │ n = slots[14909] = myself ✅
│ │
│ │ // 执行命令
│ │ value = db->dict->get("user:1001");
│ │
│←── "Alice" ───────────────────│
源码实现:
c
// cluster.c
int getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, int argc,
int *hashslot, int *error_code) {
// ======== 1. 计算槽位 ========
int slot = 0;
// 获取命令涉及的所有Key
if (cmd->getkeys_proc) {
int numkeys, *keys = getKeysFromCommand(cmd, argv, argc, &numkeys);
for (int i = 0; i < numkeys; i++) {
int thisslot = keyHashSlot(argv[keys[i]]->ptr,
sdslen(argv[keys[i]]->ptr));
if (slot == 0) {
slot = thisslot;
} else if (slot != thisslot) {
// 跨槽位错误
*error_code = CLUSTER_REDIR_CROSS_SLOT;
return NULL;
}
}
}
// ======== 2. 查找槽位对应的节点 ========
clusterNode *n = server.cluster->slots[slot];
if (n == NULL) {
*error_code = CLUSTER_REDIR_DOWN_UNBOUND;
return NULL;
}
// ======== 3. 检查是否在本节点 ========
if (n != myself) {
// 不在本节点,返回MOVED
*error_code = CLUSTER_REDIR_MOVED;
*hashslot = slot;
return n;
}
// ======== 4. 检查是否在迁移中 ========
if (server.cluster->migrating_slots_to[slot] != NULL) {
*error_code = CLUSTER_REDIR_ASK;
*hashslot = slot;
return server.cluster->migrating_slots_to[slot];
}
return myself;
}
ASK 重定向
槽位迁移中的处理:
arduino
┌────────────────────────────────────────────────┐
│ ASK 重定向(槽位迁移场景) │
└────────────────────────────────────────────────┘
场景:Slot 100 从 Node1 迁移到 Node2
Node1 状态:
migrating_slots_to[100] = Node2
Node2 状态:
importing_slots_from[100] = Node1
Key 状态:
key_a: 已迁移到 Node2
key_b: 还在 Node1
┌─────────────────────────────────────────────┐
│ 请求1:GET key_a │
├─────────────────────────────────────────────┤
Client → Node1: GET key_a
↓
Node1处理:
slot = 100
if (migrating_slots_to[100] == Node2) {
// 槽位正在迁出
// 先在本地查找
if (lookupKeyRead(db, "key_a") != NULL) {
return value; // 还在本地
} else {
// 不在本地,可能已迁移
return "-ASK 100 127.0.0.1:6381";
}
}
↓
Client 收到 ASK:
1. 不更新缓存(临时重定向)
2. 连接 Node2
3. 先发送 ASKING
4. 再发送 GET key_a
↓
Client → Node2: ASKING
Client → Node2: GET key_a
↓
Node2处理:
if (c->flags & CLIENT_ASKING) {
// 临时允许访问正在导入的槽
if (importing_slots_from[100] == Node1) {
c->flags &= ~CLIENT_ASKING; // 清除标志
return lookupKeyRead(db, "key_a"); // 返回数据 ✅
}
}
↓
Node2 → Client: "value_a"
┌─────────────────────────────────────────────┐
│ 请求2:GET key_b │
├─────────────────────────────────────────────┤
Client → Node1: GET key_b
↓
Node1处理:
slot = 100
if (migrating_slots_to[100] == Node2) {
if (lookupKeyRead(db, "key_b") != NULL) {
// key_b 还在本地
return "value_b"; ✅ 直接返回
}
}
↓
Node1 → Client: "value_b"
故障检测与转移
PFAIL 到 FAIL 的完整过程
故障报告收集机制:
c
// cluster.c
typedef struct clusterNodeFailReport {
struct clusterNode *node; // 报告者
mstime_t time; // 报告时间
} clusterNodeFailReport;
// 添加故障报告
void clusterNodeAddFailureReport(clusterNode *failing, clusterNode *sender) {
list *l = failing->fail_reports;
listIter li;
listNode *ln;
clusterNodeFailReport *fr;
// ======== 1. 检查是否已有该报告 ========
listRewind(l, &li);
while((ln = listNext(&li)) != NULL) {
fr = ln->value;
if (fr->node == sender) {
// 已存在,更新时间
fr->time = mstime();
return;
}
}
// ======== 2. 添加新报告 ========
fr = zmalloc(sizeof(*fr));
fr->node = sender;
fr->time = mstime();
listAddNodeTail(l, fr);
}
// 清理过期报告
void clusterNodeCleanupFailureReports(clusterNode *node) {
list *l = node->fail_reports;
listNode *ln;
listIter li;
clusterNodeFailReport *fr;
mstime_t maxtime = server.cluster_node_timeout * 2; // 30秒
mstime_t now = mstime();
listRewind(l,&li);
while ((ln = listNext(&li)) != NULL) {
fr = ln->value;
// 超过2倍超时时间的报告删除
if (now - fr->time > maxtime) {
listDelNode(l,ln);
}
}
}
// 判断客观下线
void markNodeAsFailingIfNeeded(clusterNode *node) {
int failures;
int needed_quorum = (server.cluster->size / 2) + 1;
// ======== 1. 统计故障报告 ========
failures = clusterNodeFailureReportsCount(node);
// ======== 2. 加上自己的判断 ========
if (node->flags & CLUSTER_NODE_PFAIL) failures++;
// ======== 3. 判断是否达到法定人数 ========
if (failures < needed_quorum) return;
// ======== 4. 标记为FAIL ========
node->flags &= ~CLUSTER_NODE_PFAIL;
node->flags |= CLUSTER_NODE_FAIL;
node->fail_time = mstime();
// ======== 5. 广播FAIL消息 ========
clusterSendFail(node->name);
}
从节点选举
延迟机制详解:
c
// cluster.c
void clusterHandleSlaveFailover(void) {
mstime_t data_age;
mstime_t auth_age = mstime() - server.cluster->failover_auth_time;
int needed_quorum = (server.cluster->size / 2) + 1;
int manual_failover = server.cluster->mf_end != 0 &&
server.cluster->mf_can_start;
// ======== 1. 计算数据年龄 ========
data_age = server.master_repl_offset ?
(server.unixtime - server.repl_down_since) : 0;
// ======== 2. 计算排名 ========
// 根据repl_offset排序,offset越大排名越前
int rank = clusterGetSlaveRank();
// ======== 3. 计算延迟 ========
mstime_t auth_delay =
500 + // 固定延迟
random() % 500 + // 随机延迟(0-500ms)
rank * 1000; // 排名延迟(rank×1秒)
// 数据太旧,增加延迟
if (data_age > server.cluster_node_timeout)
auth_delay += server.cluster_node_timeout;
// ======== 4. 等待延迟时间 ========
if (auth_age < auth_delay) return;
// ======== 5. 请求投票 ========
server.cluster->currentEpoch++;
server.cluster->failover_auth_epoch = server.cluster->currentEpoch;
server.cluster->failover_auth_time = mstime();
server.cluster->failover_auth_sent = 0;
clusterRequestFailoverAuth();
serverLog(LL_WARNING,
"Start of election (rank #%d, offset %lld)", rank,
server.master_repl_offset);
}
详细示例:
ini
场景:Master1 FAIL,3个Slave竞选
Slave1-1:
repl_offset: 1000000 (最新)
rank: 0
data_age: 5秒
delay = 500 + random(250) + 0×1000 = 750ms
Slave1-2:
repl_offset: 999500
rank: 1
data_age: 5秒
delay = 500 + random(300) + 1×1000 = 1800ms
Slave1-3:
repl_offset: 998000
rank: 2
data_age: 5秒
delay = 500 + random(400) + 2×1000 = 2900ms
时间线:
T0: Master1 标记为 FAIL
T0+750ms: Slave1-1 发起选举
currentEpoch = 101
Slave1-1 → Master2: FAILOVER_AUTH_REQUEST (epoch=101)
Slave1-1 → Master3: FAILOVER_AUTH_REQUEST (epoch=101)
Master2 投票逻辑:
if (lastVoteEpoch < 101) { // 还没投过
if (Slave1-1的master确实是FAIL) { ✅
lastVoteEpoch = 101;
return FAILOVER_AUTH_ACK; // 投票
}
}
Master3 同样投票给 Slave1-1
Slave1-1 收到:
auth_count = 2
needed = 3/2 + 1 = 2
2 >= 2 ✅ 当选!
T0+800ms: Slave1-1 提升为 Master
flags &= ~CLUSTER_NODE_SLAVE;
flags |= CLUSTER_NODE_MASTER;
configEpoch = 101; (最新纪元)
接管 Slot 0-5460
T0+1800ms: Slave1-2 发起选举(太晚)
Master2: lastVoteEpoch = 101(已投票)
Master3: lastVoteEpoch = 101(已投票)
无法获得投票 ❌
T0+2900ms: Slave1-3 发起选举(更晚)
同样无法获得投票 ❌
最终:Slave1-1 成为新 Master
扩容与缩容
槽位迁移原子性保证
MIGRATE 命令的实现:
c
// cluster.c
void migrateCommand(client *c) {
robj *o;
rio cmd, payload;
int may_retry = 1;
migrate_socket_error:
// ======== 1. 连接目标节点 ========
int fd = migrateGetSocket(c, c->argv[1], c->argv[2], timeout);
if (fd == -1) {
addReplyError(c, "Can't connect to target node");
return;
}
try_again:
// ======== 2. 查找Key ========
if ((ov = lookupKeyRead(c->db, c->argv[3])) == NULL) {
if (may_retry) {
may_retry = 0;
// Key不存在,可能是已经迁移了
goto socket_err;
}
addReply(c, shared.nokey);
return;
}
// ======== 3. 序列化Key ========
createDumpPayload(&payload, ov, c->argv[3]);
// ======== 4. 构造RESTORE命令 ========
serverAssertWithInfo(c,NULL,
rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
serverAssertWithInfo(c,NULL,
rioWriteBulkString(&cmd,"RESTORE",7));
serverAssertWithInfo(c,NULL,
rioWriteBulkString(&cmd,c->argv[3]->ptr,sdslen(c->argv[3]->ptr)));
serverAssertWithInfo(c,NULL,
rioWriteBulkString(&cmd,"0",1)); // TTL
serverAssertWithInfo(c,NULL,
rioWriteBulkString(&cmd,payload.io.buffer.ptr,
sdslen(payload.io.buffer.ptr)));
if (replace) {
serverAssertWithInfo(c,NULL,
rioWriteBulkString(&cmd,"REPLACE",7));
}
// ======== 5. 发送命令 ========
errno = 0;
{
sds buf = cmd.io.buffer.ptr;
size_t pos = 0, towrite;
int nwritten = 0;
while ((towrite = sdslen(buf)-pos) > 0) {
towrite = (towrite > (64*1024) ? (64*1024) : towrite);
nwritten = connSyncWrite(fd,buf+pos,towrite,timeout);
if (nwritten != (signed)towrite) {
// 写入失败
goto socket_err;
}
pos += nwritten;
}
}
// ======== 6. 等待响应 ========
char buf0[1024], buf1[1024], buf2[1024];
// 读取响应
if ((nread = connSyncReadLine(fd, buf0, sizeof(buf0), timeout)) <= 0)
goto socket_err;
if (buf0[0] == '-') {
// 目标节点返回错误
addReplyErrorFormat(c,"Target instance error: %s", buf0+1);
return;
}
// ======== 7. 删除源Key ========
dbDelete(c->db, c->argv[3]);
signalModifiedKey(c, c->db, c->argv[3]);
// ======== 8. 返回成功 ========
addReply(c, shared.ok);
server.dirty++;
return;
socket_err:
// 错误处理:关闭连接,可能重试
migrateCloseSocket(c->argv[1], c->argv[2]);
if (errno != ETIMEDOUT && may_retry) {
may_retry = 0;
goto try_again;
}
addReplyError(c, "IO error");
}
在线扩容完整实战
扩容脚本(3节点→4节点):
bash
#!/bin/bash
# redis-cluster-scale-out.sh
# 在线扩容:3主3从 → 4主4从
set -e
EXIST_NODE="127.0.0.1:6379"
NEW_MASTER_PORT=7000
NEW_SLAVE_PORT=7001
echo "========================================="
echo "Redis Cluster 在线扩容"
echo "========================================="
# ======== 步骤1:启动新节点 ========
echo ""
echo "步骤1:启动新节点..."
redis-server --port $NEW_MASTER_PORT \
--cluster-enabled yes \
--cluster-config-file nodes-$NEW_MASTER_PORT.conf \
--cluster-node-timeout 15000 \
--appendonly yes \
--daemonize yes
redis-server --port $NEW_SLAVE_PORT \
--cluster-enabled yes \
--cluster-config-file nodes-$NEW_SLAVE_PORT.conf \
--cluster-node-timeout 15000 \
--appendonly yes \
--daemonize yes
sleep 2
echo "✅ 新节点启动完成"
# ======== 步骤2:添加Master节点 ========
echo ""
echo "步骤2:添加新Master节点到集群..."
redis-cli --cluster add-node 127.0.0.1:$NEW_MASTER_PORT $EXIST_NODE
# 获取新节点ID
NEW_MASTER_ID=$(redis-cli -p $NEW_MASTER_PORT CLUSTER MYID)
echo "新Master ID: $NEW_MASTER_ID"
# ======== 步骤3:Reshard槽位 ========
echo ""
echo "步骤3:开始迁移槽位(4096个)..."
echo "这一步需要一些时间,请耐心等待..."
# 自动化reshard
redis-cli --cluster reshard $EXIST_NODE \
--cluster-from all \
--cluster-to $NEW_MASTER_ID \
--cluster-slots 4096 \
--cluster-yes \
--cluster-timeout 60000 \
--cluster-pipeline 10
echo "✅ 槽位迁移完成"
# ======== 步骤4:添加Slave节点 ========
echo ""
echo "步骤4:添加Slave节点..."
redis-cli --cluster add-node 127.0.0.1:$NEW_SLAVE_PORT $EXIST_NODE \
--cluster-slave \
--cluster-master-id $NEW_MASTER_ID
echo "✅ Slave节点添加完成"
# ======== 步骤5:验证 ========
echo ""
echo "步骤5:验证集群状态..."
# 检查集群信息
echo "集群信息:"
redis-cli -p $NEW_MASTER_PORT CLUSTER INFO | grep -E "cluster_state|cluster_size"
echo ""
echo "节点列表:"
redis-cli -p $NEW_MASTER_PORT CLUSTER NODES | grep -E "myself|master|slave" | awk '{print $2,$3,$9}'
echo ""
echo "========================================="
echo "✅ 扩容完成!"
echo "========================================="
槽位迁移监控
bash
#!/bin/bash
# slot-migration-monitor.sh
SOURCE_NODE="127.0.0.1:6379"
TARGET_NODE="127.0.0.1:7000"
START_SLOT=0
END_SLOT=1364
echo "监控槽位迁移:Slot $START_SLOT-$END_SLOT"
echo "$SOURCE_NODE → $TARGET_NODE"
echo ""
TOTAL_SLOTS=$((END_SLOT - START_SLOT + 1))
START_TIME=$(date +%s)
while true; do
# 统计已迁移的槽位
MIGRATED=0
for slot in $(seq $START_SLOT $END_SLOT); do
# 检查槽位归属
owner=$(redis-cli --cluster call $TARGET_NODE CLUSTER NODES | \
grep "myself" | \
awk '{for(i=9;i<=NF;i++)print $i}' | \
grep -E "^$slot$|^$slot-|^[0-9]+-$slot$|^[0-9]+-$slot-")
if [ -n "$owner" ]; then
((MIGRATED++))
fi
done
# 计算进度
PERCENT=$((MIGRATED * 100 / TOTAL_SLOTS))
ELAPSED=$(($(date +%s) - START_TIME))
# 估算剩余时间
if [ $MIGRATED -gt 0 ]; then
AVG_TIME=$((ELAPSED / MIGRATED))
REMAINING=$(( (TOTAL_SLOTS - MIGRATED) * AVG_TIME))
REMAINING_MIN=$((REMAINING / 60))
else
REMAINING_MIN="?"
fi
# 显示进度条
printf "\r[%-50s] %d%% (%d/%d) 用时:%ds 预计剩余:%dmin" \
"$(printf '#%.0s' $(seq 1 $((PERCENT/2))))" \
$PERCENT $MIGRATED $TOTAL_SLOTS $ELAPSED $REMAINING_MIN
# 完成检查
if [ $MIGRATED -eq $TOTAL_SLOTS ]; then
echo ""
echo "✅ 迁移完成!总耗时: ${ELAPSED}秒"
break
fi
sleep 2
done
Cluster 限制与解决方案
多键操作解决方案
java
/**
* Cluster 模式下的多键操作最佳实践
*/
public class ClusterMultiKeyService {
@Autowired
private JedisCluster jedisCluster;
/**
* 方案1:使用Hash Tag(推荐)
*/
public void batchSetWithHashTag(String userId, Map<String, String> data) {
// 所有Key使用相同的tag
Pipeline pipeline = jedisCluster.pipelined();
for (Map.Entry<String, String> entry : data.entrySet()) {
String key = "user:{" + userId + "}:" + entry.getKey();
pipeline.set(key, entry.getValue());
}
pipeline.sync(); // 批量执行
}
/**
* 方案2:客户端手动路由
*/
public List<String> multiGet(List<String> keys) {
// 按Slot分组
Map<Integer, List<String>> slotGroups = new HashMap<>();
for (String key : keys) {
int slot = JedisClusterCRC16.getSlot(key);
slotGroups.computeIfAbsent(slot, k -> new ArrayList<>()).add(key);
}
// 并发查询不同Slot
List<String> results = new ArrayList<>();
for (Map.Entry<Integer, List<String>> entry : slotGroups.entrySet()) {
List<String> slotKeys = entry.getValue();
// 同一Slot的Key可以批量查询
List<String> values = jedisCluster.mget(slotKeys.toArray(new String[0]));
results.addAll(values);
}
return results;
}
/**
* 方案3:Lua脚本(同槽位)
*/
public void transactionWithLua(String userId) {
String script =
"redis.call('HINCRBY', KEYS[1], 'orders', 1) " +
"redis.call('LPUSH', KEYS[2], ARGV[1]) " +
"redis.call('SADD', KEYS[3], ARGV[2]) " +
"return 1";
jedisCluster.eval(script,
Arrays.asList(
"user:{" + userId + "}:profile",
"user:{" + userId + "}:orders",
"user:{" + userId + "}:tags"
),
Arrays.asList("order123", "vip"));
// Lua脚本保证原子性 ✅
}
}
生产环境部署
推荐配置
bash
# redis-cluster.conf
# ======== 基础配置 ========
bind 0.0.0.0
port 6379
protected-mode yes
requirepass <strong_password>
masterauth <strong_password> # Master间通信也需要
# ======== 集群配置 ========
cluster-enabled yes
cluster-config-file nodes-6379.conf
cluster-node-timeout 15000 # 节点超时15秒
cluster-slave-validity-factor 10 # Slave有效性因子
cluster-migration-barrier 1 # 至少保留1个Slave
cluster-require-full-coverage yes # 需要全槽位覆盖
cluster-replica-no-failover no # 允许故障转移
cluster-allow-reads-when-down no # 集群故障时不可读
# ======== 持久化配置 ========
appendonly yes
appendfsync everysec
aof-use-rdb-preamble yes
# ======== 内存配置 ========
maxmemory 8gb
maxmemory-policy allkeys-lru
# ======== 慢查询 ========
slowlog-log-slower-than 10000
slowlog-max-len 128
# ======== 客户端配置 ========
timeout 300
tcp-keepalive 300
maxclients 10000
一键部署脚本
bash
#!/bin/bash
# deploy-redis-cluster.sh
NODES=6
REPLICAS=1
BASE_PORT=6379
BASE_DIR="/data/redis"
echo "部署 $NODES 节点 Redis Cluster"
# 创建目录
for i in $(seq 0 $((NODES - 1))); do
port=$((BASE_PORT + i))
dir="$BASE_DIR/$port"
mkdir -p $dir
# 生成配置文件
cat > $dir/redis.conf <<EOF
port $port
cluster-enabled yes
cluster-config-file nodes-$port.conf
cluster-node-timeout 15000
dir $dir
appendonly yes
appendfsync everysec
daemonize yes
pidfile $dir/redis-$port.pid
logfile $dir/redis-$port.log
dbfilename dump-$port.rdb
appendfilename appendonly-$port.aof
requirepass Redis@2024
masterauth Redis@2024
protected-mode no
EOF
# 启动节点
redis-server $dir/redis.conf
echo "✅ 节点 $port 启动"
done
sleep 3
# 创建集群
CLUSTER_NODES=""
for i in $(seq 0 $((NODES - 1))); do
port=$((BASE_PORT + i))
CLUSTER_NODES="$CLUSTER_NODES 127.0.0.1:$port"
done
echo ""
echo "创建集群..."
redis-cli -a Redis@2024 --cluster create $CLUSTER_NODES \
--cluster-replicas $REPLICAS \
--cluster-yes
echo ""
echo "========================================="
echo "集群部署完成!"
echo "========================================="
# 验证
redis-cli -a Redis@2024 -p $BASE_PORT CLUSTER INFO
redis-cli -a Redis@2024 -p $BASE_PORT CLUSTER NODES
常见问题解答
Q1: Cluster 可以和 Sentinel 一起用吗?
A: 不需要,Cluster 自带故障转移。
Cluster 已经包含:
• 故障检测(Gossip)
• 自动切换(从节点选举)
• 配置传播(Config Epoch)
不需要额外的 Sentinel
Q2: 槽位迁移会丢数据吗?
A: 不会,MIGRATE 是原子操作。
markdown
MIGRATE 流程:
1. DUMP key(序列化)
2. 发送到目标节点
3. 目标节点 RESTORE
4. 收到确认
5. 删除源Key
任一步失败:
• 源Key保留
• 目标Key不会创建
• 数据不会丢失
Q3: 如何查看数据分布是否均衡?
A:
bash
# 查看每个节点的Key数量
for port in 6379 6380 6381; do
keys=$(redis-cli -p $port DBSIZE)
info=$(redis-cli -p $port INFO memory | grep used_memory_human)
slots=$(redis-cli -p $port CLUSTER NODES | grep myself | awk '{print $9}')
echo "Node $port: $keys keys, $info, slots: $slots"
done
# 输出示例:
# Node 6379: 35000 keys, used_memory_human:1.2G, slots: 0-5460
# Node 6380: 34500 keys, used_memory_human:1.1G, slots: 5461-10922
# Node 6381: 30500 keys, used_memory_human:900M, slots: 10923-16383
# ↑ 偏少,可能需要rebalance
总结
本文(下篇)深入剖析了 Redis Cluster 的实战技术:
请求重定向
- ➡️ MOVED:永久重定向,更新缓存
- 🔀 ASK:临时重定向,需ASKING命令
- 🧠 Smart Client:缓存槽位映射,直接路由
- ⚡ 性能优化:减少99%的重定向
故障转移
- 📋 故障报告:收集PFAIL信息
- 👁️ PFAIL→FAIL:超过半数确认
- ⏰ 延迟选举:offset大的优先
- 🔄 自动切换:10-30秒完成
扩容缩容
- ➕ 在线扩容:服务不中断
- 🔄 槽位迁移:MIGRATE原子性
- ⚖️ Rebalance:自动均衡负载
- 📊 进度监控:实时追踪
生产实战
- 📝 完整配置:经过验证的参数
- 🚀 一键部署:自动化脚本
- 📊 监控体系:健康检查、告警
- 🔧 运维工具:迁移、扩容、备份
系列文章链接:
- 上篇:架构、数据结构、Gossip协议
- 下篇:故障转移、扩容缩容、生产实战
💡 下一篇预告:《Redis性能优化实战:慢查询分析、Pipeline优化与监控体系》