前言:本教程在前面Redis主从复制集群搭建的基础上进行哨兵模式搭建,如果没有搭建好主从复制集群,请参考这个教程完成主从复制集群的搭建:Redis主从复制集群搭建详解
搭建哨兵
在每个节点搭建都编写哨兵的配置文件
powershell
[root@master ~]# cat > /etc/redis/conf/sentinel.conf <<EOF
# 哨兵端口
port 26379
# 后台运行
daemonize yes
# PID文件
pidfile /usr/local/redis/redis-sentinel-26379.pid
# 日志文件
logfile "/usr/local/redis/sentinel_26379.log"
# 数据目录
dir /usr/local/redis/sentinel
# 核心配置:监控的主节点信息
# sentinel monitor <master-name> <ip> <port> <quorum>
sentinel monitor mymaster 192.168.194.11 6379 2
# 节点不可达的时间阈值(毫秒)
# 超过此时间认为节点主观下线(SDOWN)
sentinel down-after-milliseconds mymaster 5000
# 故障转移超时时间(毫秒)
sentinel failover-timeout mymaster 60000
# 主节点密码(若主节点启用认证)
EOFtinel parallel-syncs mymaster 1
[root@master ~]# redis-sentinel /etc/redis/conf/sentinel.conf
*** FATAL CONFIG FILE ERROR (Redis 8.6.2) ***
Reading the configuration file, at line 10
>>> 'dir /usr/local/redis/sentinel'
No such file or directory
[root@master ~]# mkdir /usr/local/redis/sentinel
mkdir: cannot create directory '/usr/local/redis/sentinel': No such file or directory
[root@master ~]# mkdir /usr/local/redis/sentinel -p
# 启动sentinel,每个节点都执行
[root@master ~]# redis-sentinel /etc/redis/conf/sentinel.conf
查看集群信息
powershell
[root@slave1 ~]# redis-cli -p 26379 -a 1234 sentinel master mymaster
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
1) "name"
2) "mymaster"
3) "ip"
4) "192.168.194.11"
5) "port"
6) "6379"
7) "runid"
8) "c12912c3097e8c74f62d8d34713d4acf4e0e3625"
9) "flags"
10) "master"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "181"
19) "last-ping-reply"
20) "181"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "814"
25) "role-reported"
26) "master"
27) "role-reported-time"
28) "41024"
29) "config-epoch"
30) "0"
31) "num-slaves"
32) "2"
33) "num-other-sentinels"
34) "2"
35) "quorum"
36) "2"
37) "failover-timeout"
38) "60000"
39) "parallel-syncs"
40) "1"
查看节点信息
powershell
[root@slave1 ~]# redis-cli -p 26379 -a 1234 --no-auth-warning sentinel slaves mymaster
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
1) 1) "name"
2) "192.168.194.12:6379"
3) "ip"
4) "192.168.194.12"
5) "port"
6) "6379"
7) "runid"
8) "abb9f0b3cb361b0e9bec2608effdd582f5995e61"
9) "flags"
10) "slave"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "4"
19) "last-ping-reply"
20) "4"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "177"
25) "role-reported"
26) "slave"
27) "role-reported-time"
28) "271413"
29) "master-link-down-time"
30) "0"
31) "master-link-status"
32) "ok"
33) "master-host"
34) "192.168.194.11"
35) "master-port"
36) "6379"
37) "slave-priority"
38) "100"
39) "slave-repl-offset"
40) "59560"
41) "replica-announced"
42) "1"
2) 1) "name"
2) "192.168.194.13:6379"
3) "ip"
4) "192.168.194.13"
5) "port"
6) "6379"
7) "runid"
8) "edd91cad1bdac825d13c3b3cb4b9f74c70519f4d"
9) "flags"
10) "slave"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "272"
19) "last-ping-reply"
20) "272"
21) "down-after-milliseconds"
22) "5000"
23) "info-refresh"
24) "176"
25) "role-reported"
26) "slave"
27) "role-reported-time"
28) "271412"
29) "master-link-down-time"
30) "0"
31) "master-link-status"
32) "ok"
33) "master-host"
34) "192.168.194.11"
35) "master-port"
36) "6379"
37) "slave-priority"
38) "100"
39) "slave-repl-offset"
40) "59560"
41) "replica-announced"
42) "1"
注意:这里的 AUTH failed 警告可以忽略,因为命令已经成功返回了主节点的地址信息。
查看哨兵节点信息
powershell
[root@slave1 ~]# redis-cli -p 26379 -a 1234 --no-auth-warning sentinel sentinels mymaster
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
1) 1) "name"
2) "b0814e14357b69f231a96b01ea856ab54096860c"
3) "ip"
4) "192.168.194.13"
5) "port"
6) "26379"
7) "runid"
8) "b0814e14357b69f231a96b01ea856ab54096860c"
9) "flags"
10) "sentinel"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "8"
19) "last-ping-reply"
20) "8"
21) "down-after-milliseconds"
22) "5000"
23) "last-hello-message"
24) "672"
25) "voted-leader"
26) "?"
27) "voted-leader-epoch"
28) "0"
2) 1) "name"
2) "3cb2d4c5709a0d332d645a2231d1752ce27a4df1"
3) "ip"
4) "192.168.194.11"
5) "port"
6) "26379"
7) "runid"
8) "3cb2d4c5709a0d332d645a2231d1752ce27a4df1"
9) "flags"
10) "sentinel"
11) "link-pending-commands"
12) "0"
13) "link-refcount"
14) "1"
15) "last-ping-sent"
16) "0"
17) "last-ok-ping-reply"
18) "8"
19) "last-ping-reply"
20) "8"
21) "down-after-milliseconds"
22) "5000"
23) "last-hello-message"
24) "1135"
25) "voted-leader"
26) "?"
27) "voted-leader-epoch"
28) "0"
查看当前主节点信息
powershell
[root@slave1 ~]# redis-cli -p 26379 -a 1234 --no-auth-warning sentinel get-master-addr-by-name mymaster
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
1) "192.168.194.11"
2) "6379"
查看哨兵集群信息
powershell
[root@slave1 ~]# redis-cli -p 26379 -a 1234 --no-auth-warning info sentinel
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_tilt_since_seconds:-1
sentinel_total_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=192.168.194.11:6379,slaves=2,sentinels=3
[root@slave1 ~]#
测试故障转移
关闭master进程
powershell
[root@master ~]# redis-cli -a 1234 shutdown
Warning: Using a password with '-a' or '-u' option on the command line interface may not be safe.
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
[root@master ~]# ps -ef | grep redis
root 2103 1 0 20:30 ? 00:00:02 redis-sentinel *:26379 [sentinel]
root 2127 1698 0 20:42 pts/0 00:00:00 grep --color=auto redis
[root@master ~]#
查看sentinel日志
powershell
[root@master ~]# cat /usr/local/redis/sentinel_26379.log
# ==================== 启动阶段 ====================
# 警告:内存过量使用未启用(可忽略,不影响哨兵功能)
2102:X 13 May 2026 20:30:55.806 # WARNING Memory overcommit must be enabled!...
# Redis 哨兵启动信息
2103:X 13 May 2026 20:30:55.806 * Redis version=8.6.2, bits=64
2103:X 13 May 2026 20:30:55.807 * Running mode=sentinel, port=26379.
# 生成哨兵唯一ID
2103:X 13 May 2026 20:30:55.809 * Sentinel ID is 3cb2d4c5709a0d332d645a2231d1752ce27a4df1
# +monitor: 开始监控主节点 mymaster (192.168.194.11:6379),需要2个哨兵同意才判定故障
2103:X 13 May 2026 20:30:55.809 # +monitor master mymaster 192.168.194.11 6379 quorum 2
# +slave: 发现两个从节点
2103:X 13 May 2026 20:30:55.809 * +slave slave 192.168.194.12:6379 @ mymaster 192.168.194.11 6379
2103:X 13 May 2026 20:30:55.810 * +slave slave 192.168.194.13:6379 @ mymaster 192.168.194.11 6379
# +sentinel: 发现其他哨兵节点(哨兵之间自动发现)
2103:X 13 May 2026 20:30:57.810 * +sentinel sentinel b0814e... 192.168.194.13 26379 @ mymaster
2103:X 13 May 2026 20:30:57.875 * +sentinel sentinel 609c96... 192.168.194.12 26379 @ mymaster
# ==================== 故障检测阶段 ====================
# +sdown: 主观下线(当前哨兵认为主节点不可达,超过5秒未响应)
2103:X 13 May 2026 20:41:59.151 # +sdown master mymaster 192.168.194.11 6379
# +odown: 客观下线(quorum=2,实际有3个哨兵都认为主节点下线,触发故障转移)
# quorum 3/2 表示:3个哨兵中有3个同意,超过设定的2票阈值
2103:X 13 May 2026 20:41:59.216 # +odown master mymaster 192.168.194.11 6379 #quorum 3/2
# ==================== 领导者选举阶段 ====================
# 开始新的纪元(每次故障转移递增)
2103:X 13 May 2026 20:41:59.216 # +new-epoch 1
# 尝试进行故障转移
2103:X 13 May 2026 20:41:59.216 # +try-failover master mymaster 192.168.194.11 6379
# 当前哨兵给自己投票(ID: 3cb2d4...)
2103:X 13 May 2026 20:41:59.217 # +vote-for-leader 3cb2d4c5709a0d332d645a2231d1752ce27a4df1 1
# 哨兵 192.168.194.13 投票给了自己
2103:X 13 May 2026 20:41:59.218 * b0814e... voted for b0814e... 1
# 哨兵 192.168.194.12 投票给了当前哨兵(3cb2d4...)
# 2票 > 3个哨兵的一半,当前哨兵成为领导者
2103:X 13 May 2026 20:41:59.220 * 609c96... voted for 3cb2d4... 1
# +elected-leader: 当前哨兵被选举为领导者,负责执行故障转移
2103:X 13 May 2026 20:41:59.328 # +elected-leader master mymaster 192.168.194.11 6379
# ==================== 从节点选择阶段 ====================
# 进入选择新主节点的阶段
2103:X 13 May 2026 20:41:59.328 # +failover-state-select-slave master mymaster 192.168.194.11 6379
# 选中从节点 192.168.194.12:6379 作为新主节点
# 选择依据:复制偏移量最大、优先级最高、运行时间最长等
2103:X 13 May 2026 20:41:59.439 # +selected-slave slave 192.168.194.12:6379 @ mymaster
# ==================== 故障转移执行阶段 ====================
# 发送 SLAVEOF NO ONE 命令,让选中的从节点升级为主节点
2103:X 13 May 2026 20:41:59.439 * +failover-state-send-slaveof-noone slave 192.168.194.12:6379
# 等待其他哨兵确认新主节点提升成功
2103:X 13 May 2026 20:41:59.503 * +failover-state-wait-promotion slave 192.168.194.12:6379
# +promoted-slave: 从节点已成功提升为主节点
2103:X 13 May 2026 20:41:59.969 # +promoted-slave slave 192.168.194.12:6379 @ mymaster
# 重新配置其他从节点,让它们复制新的主节点
2103:X 13 May 2026 20:41:59.969 # +failover-state-reconf-slaves master mymaster
# +slave-reconf-sent: 向从节点 192.168.194.13 发送重新配置命令
2103:X 13 May 2026 20:42:00.030 * +slave-reconf-sent slave 192.168.194.13:6379 @ mymaster
# -odown: 主节点客观下线状态解除(因为已经切换了新主)
2103:X 13 May 2026 20:42:00.318 # -odown master mymaster 192.168.194.11 6379
# 从节点正在重新配置中(进行复制)
2103:X 13 May 2026 20:42:01.053 * +slave-reconf-inprog slave 192.168.194.13:6379
# 从节点重新配置完成(已成功复制新主节点)
2103:X 13 May 2026 20:42:01.053 * +slave-reconf-done slave 192.168.194.13:6379
# ==================== 切换完成阶段 ====================
# +failover-end: 故障转移成功结束
2103:X 13 May 2026 20:42:01.115 # +failover-end master mymaster 192.168.194.11 6379
# +switch-master: 核心切换日志!
# 主节点从 192.168.194.11:6379 切换到 192.168.194.12:6379
2103:X 13 May 2026 20:42:01.115 # +switch-master mymaster 192.168.194.11 6379 192.168.194.12 6379
# 更新从节点列表(原来的13从节点仍然是从节点)
2103:X 13 May 2026 20:42:01.115 * +slave slave 192.168.194.13:6379 @ mymaster 192.168.194.12 6379
# 原来的主节点 11 现在变成了从节点
2103:X 13 May 2026 20:42:01.115 * +slave slave 192.168.194.11:6379 @ mymaster 192.168.194.12 6379
# ==================== 故障节点监控 ====================
# +sdown: 原来故障的主节点 11 现在作为从节点,仍然处于下线状态
2103:X 13 May 2026 20:42:06.220 # +sdown slave 192.168.194.11:6379 @ mymaster 192.168.194.12 6379
查看主节点
powerhsell
[root@master ~]# redis-cli -p 26379 -a 1234 --no-auth-warning sentinel get-master-addr-by-name mymaster
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
1) "192.168.194.12"
2) "6379"
查看节点信息
powershell
[root@master ~]# redis-cli -p 26379 -a 1234 --no-auth-warning info sentinel
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
# Sentinel
sentinel_masters:1
sentinel_tilt:0
sentinel_tilt_since_seconds:-1
sentinel_total_tilt:0
sentinel_running_scripts:0
sentinel_scripts_queue_length:0
sentinel_simulate_failure_flags:0
master0:name=mymaster,status=ok,address=192.168.194.12:6379,slaves=2,sentinels=3
重新启动redis服务
powershell
[root@master ~]# redis-server /etc/redis/redis.conf
再次查看主节点
powershell
[root@master ~]# redis-cli -p 26379 -a 1234 --no-auth-warning sentinel get-master-addr-by-name mymaster
AUTH failed: ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?
1) "192.168.194.12"
2) "6379"
这里会发现,新的主节点还是slave1,因为master节点断开的时候,两个从节点已经选举出新的主节点了。