高可用
MGR单主模式是MySQL官方推出的高可用解决方案,它通过Paxos协议实现了真正的多副本强一致性,在保证数据安全的同时提供自动故障转移能力。
注:缺点是故障恢复节点时不能增量恢复,要删除原有数据,从0开始同步数据,小数据量场景问题不大
服务器信息
| 节点 | IP地址 | 主机名 | 角色 | Server ID |
|---|---|---|---|---|
| Node1 | 192.168.200.100 | mysql01 | 主节点 | 1 |
| Node2 | 192.168.200.101 | mysql02 | 从节点 | 2 |
| Node3 | 192.168.200.102 | mysql03 | 从节点 | 3 |
软件版本
- CentOS 7.9
- MySQL 8.0.30
- MGR 单主模式
基础环境准备(所有节点执行)
环境初始化
bash
vi init.sh
bash
#!/bin/bash
# 定义节点信息
NODES=("192.168.200.100 mysql01" "192.168.200.101 mysql02" "192.168.200.102 mysql03")
# 定义当前节点的密码(默认集群统一密码)
HOST_PASS="000000"
# 时间同步的目标节点
TIME_SERVER= mysql01
# 时间同步的地址段
TIME_SERVER_IP= 192.160.200.0/24
# 欢迎界面
cat > /etc/motd <<EOF
################################
# Welcome to mysqlcluster #
################################
EOF
# 配置仓库,使用本地离线源
mount /dev/sr0 /mnt/
mv /etc/yum.repos.d/* /media/
cat > /etc/yum.repos.d/centos.repo << eof
[c]
name=c
baseurl=file:///mnt
gpgcheck=0
enabled=1
eof
# 优化ssh连接
sed -i -e 's/#UseDNS yes/UseDNS no/g' -e 's/GSSAPIAuthentication yes/GSSAPIAuthentication no/g' /etc/ssh/sshd_config
systemctl reload sshd
# 修改主机名
for node in "${NODES[@]}"; do
ip=$(echo "$node" | awk '{print $1}')
hostname=$(echo "$node" | awk '{print $2}')
# 获取当前节点的主机名和 IP
current_ip=$(hostname -I | awk '{print $1}')
current_hostname=$(hostname)
# 检查当前节点与要修改的节点信息是否匹配
if [[ "$current_ip" == "$ip" && "$current_hostname" != "$hostname" ]]; then
echo "Updating hostname to $hostname on $current_ip..."
hostnamectl set-hostname "$hostname"
if [ $? -eq 0 ]; then
echo "Hostname updated successfully."
else
echo "Failed to update hostname."
fi
break
fi
done
# 遍历节点信息并添加到 hosts 文件
for node in "${NODES[@]}"; do
ip=$(echo "$node" | awk '{print $1}')
hostname=$(echo "$node" | awk '{print $2}')
# 检查 hosts 文件中是否已存在相应的解析
if grep -q "$ip $hostname" /etc/hosts; then
echo "Host entry for $hostname already exists in /etc/hosts."
else
# 添加节点的解析条目到 hosts 文件
sudo sh -c "echo '$ip $hostname' >> /etc/hosts"
echo "Added host entry for $hostname in /etc/hosts."
fi
done
if [[ ! -s ~/.ssh/id_rsa.pub ]]; then
ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa -q -b 2048
fi
# 检查并安装 expect 工具
if ! which expect &> /dev/null; then
echo "expect 工具未安装,正在安装 expect..."
sudo yum install -y expect
fi
# 遍历所有节点
for node in "${NODES[@]}"; do
ip=$(echo "$node" | awk '{print $1}')
hostname=$(echo "$node" | awk '{print $2}')
expect -c "
set timeout -1
spawn ssh-copy-id -i /root/.ssh/id_rsa.pub $hostname
expect {
\"*password:*\" { send -- \"$HOST_PASS\r\"; exp_continue }
\"*(yes/no)*\" { send -- \"yes\r\"; exp_continue }
eof { exit 1 }
}
"
done
# 时间同步
if [[ $name == $TIME_SERVER ]]; then
# 配置当前节点为时间同步源
sed -i '3,6s/^/#/g' /etc/chrony.conf
sed -i "7s/^/server $TIME_SERVER iburst/g" /etc/chrony.conf
echo "allow $TIME_SERVER_IP" >> /etc/chrony.conf
echo "local stratum 10" >> /etc/chrony.conf
else
# 配置当前节点同步到目标节点
sed -i '3,6s/^/#/g' /etc/chrony.conf
sed -i "7s/^/server $TIME_SERVER iburst/g" /etc/chrony.conf
fi
# 重启并启用 chrony 服务
systemctl restart chronyd
systemctl enable chronyd
echo "###############################################################"
echo "################# 集群初始化成功 ######################"
echo "###############################################################"
- 关闭防火墙和selinux(生产环境则放通端口)
bash
systemctl stop firewalld
setenforce 0
安装MySQL(所有节点执行)
- 解压软件包
bash
tar -xf mysql-8.0.30-1.el7.x86_64.rpm-bundle.tar -C /opt/
- 复制给其他节点
bash
scp -r /opt/* mysql02:/opt/
scp -r /opt/* mysql03:/opt/
- 安装并初始化
bash
#卸载节点自带的mariadb
rpm -e --nodeps mariadb-libs-*
#安装mysql
yum install -y /opt/*
#初始化mysql
mysqld --initialize --console
# 授权目录权限
chown -R mysql:mysql /var/lib/mysql/
# 启动并配置开机自启
systemctl enable --now mysqld
- 修改登录密码
bash
# 获取初始密码
INIT_PASS=$(grep 'temporary password' /var/log/mysqld.log | awk '{print $NF}')
echo "初始密码: $INIT_PASS"
# 修改root密码
mysqladmin -uroot -p"$INIT_PASS" password '000000'
- 设置远程访问
bash
mysql -uroot -p000000
use mysql;
update user set Host='%' where User='root';
flush privileges;
修改配置文件
- 节点1配置 (mysql01 - 主节点)
bash
cat > /etc/my.cnf << 'EOF'
[mysqld]
# 基础配置
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
# 网络配置
bind-address=0.0.0.0
port=3306
# 字符集
character-set-server=utf8mb4
collation-server=utf8mb4_unicode_ci
# ========== 复制必需配置 ==========
server_id=1
gtid_mode=ON
enforce_gtid_consistency=ON
log_bin=mysql-bin
binlog_format=ROW
binlog_checksum=NONE
log_slave_updates=ON
# ========== MGR配置 ==========
disabled_storage_engines="MyISAM,BLACKHOLE,FEDERATED,ARCHIVE,MEMORY"
plugin_load_add='group_replication.so'
transaction_write_set_extraction=XXHASH64
group_replication_group_name="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa"
group_replication_start_on_boot=OFF
group_replication_local_address="192.168.200.100:33061"
group_replication_group_seeds="192.168.200.100:33061,192.168.200.101:33061,192.168.200.102:33061"
group_replication_bootstrap_group=OFF
group_replication_single_primary_mode=ON
group_replication_enforce_update_everywhere_checks=OFF
group_replication_consistency=EVENTUAL
# 性能优化
master_info_repository=TABLE
relay_log_info_repository=TABLE
relay_log_recovery=ON
slave_preserve_commit_order=ON
binlog_transaction_dependency_tracking=WRITESET
# 二进制日志
expire_logs_days=7
sync_binlog=1
innodb_flush_log_at_trx_commit=1
# InnoDB优化
innodb_buffer_pool_size=1G
innodb_log_file_size=256M
innodb_flush_method=O_DIRECT
[mysql]
default-character-set=utf8mb4
prompt=\\u@\\h [\\d]>\\_
[client]
default-character-set=utf8mb4
EOF
- 节点2配置 (mysql02 - 从节点)
bash
scp /etc/my.cnf mysql02:/etc/my.cnf
bash
# 仅修改以下参数,其他与节点1相同
server_id=2
group_replication_local_address="192.168.200.101:33061"
- 节点3配置 (mysql03 - 从节点)
bash
scp /etc/my.cnf mysql03:/etc/my.cnf
bash
# 仅修改以下参数,其他与节点1相同
server_id=3
group_replication_local_address="192.168.200.102:33061"
- 重启所有集群
bash
systemctl restart mysqld
配置MGR集群
- 所有节点基础配置
bash
mysql -uroot -p000000
sql
-- 创建复制用户(使用mysql_native_password避免认证问题)
SET SQL_LOG_BIN=0;
CREATE USER 'repl'@'%' IDENTIFIED WITH mysql_native_password BY 'ReplPassword123!';
GRANT REPLICATION SLAVE ON *.* TO 'repl'@'%';
GRANT BACKUP_ADMIN ON *.* TO 'repl'@'%';
GRANT GROUP_REPLICATION_STREAM ON *.* TO 'repl'@'%';
SET SQL_LOG_BIN=1;
FLUSH PRIVILEGES;
- 验证插件
bash
SELECT PLUGIN_NAME, PLUGIN_STATUS FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME = 'group_replication';
bash
root@localhost [(none)]> SELECT PLUGIN_NAME, PLUGIN_STATUS
-> FROM INFORMATION_SCHEMA.PLUGINS
-> WHERE PLUGIN_NAME = 'group_replication';
+-------------------+---------------+
| PLUGIN_NAME | PLUGIN_STATUS |
+-------------------+---------------+
| group_replication | ACTIVE |
+-------------------+---------------+
1 row in set (0.00 sec)
节点1初始化集群(主节点)
sql
-- 仅在mysql01上执行
SET GLOBAL group_replication_bootstrap_group=ON;
START GROUP_REPLICATION;
SET GLOBAL group_replication_bootstrap_group=OFF;
-- 验证
SELECT
MEMBER_HOST,
MEMBER_STATE,
MEMBER_ROLE,
IF(MEMBER_STATE='ONLINE', '✅ 成功', '❌ 失败') as 状态
FROM performance_schema.replication_group_members;
bash
root@localhost [(none)]> SELECT
-> MEMBER_HOST,
-> MEMBER_STATE,
-> MEMBER_ROLE,
-> IF(MEMBER_STATE='ONLINE', '\U+2705 成功', '\U+274C 失败') as 状态
-> FROM performance_schema.replication_group_members;
+-------------+--------------+-------------+------------+
| MEMBER_HOST | MEMBER_STATE | MEMBER_ROLE | 状态 |
+-------------+--------------+-------------+------------+
| mysql01 | ONLINE | PRIMARY | ✅ 成功 |
+-------------+--------------+-------------+------------+
1 row in set (0.00 sec)
节点2加入集群
sql
-- 在 mysql02 上执行
mysql -uroot -p000000
RESET MASTER;
START GROUP_REPLICATION USER='repl', PASSWORD='ReplPassword123!';
节点3加入集群
sql
-- 在 mysql03 上执行
mysql -uroot -p000000
RESET MASTER;
START GROUP_REPLICATION USER='repl', PASSWORD='ReplPassword123!';
验证集群状态
查看集群成员
sql
-- 在任一节点执行
SELECT * FROM performance_schema.replication_group_members;
bash
root@localhost [(none)]> SELECT * FROM performance_schema.replication_group_members;
+---------------------------+--------------------------------------+-------------+-------------+--------------+-------------+----------------+----------------------------+
| CHANNEL_NAME | MEMBER_ID | MEMBER_HOST | MEMBER_PORT | MEMBER_STATE | MEMBER_ROLE | MEMBER_VERSION | MEMBER_COMMUNICATION_STACK |
+---------------------------+--------------------------------------+-------------+-------------+--------------+-------------+----------------+----------------------------+
| group_replication_applier | 3afb0ef2-fbf5-11f0-bb0f-000c29eb9544 | mysql01 | 3306 | ONLINE | PRIMARY | 8.0.30 | XCom |
| group_replication_applier | 44cf7653-fbf5-11f0-8efe-000c29e0f099 | mysql02 | 3306 | ONLINE | SECONDARY | 8.0.30 | XCom |
| group_replication_applier | 45969291-fbf5-11f0-8e9d-000c295639c3 | mysql03 | 3306 | ONLINE | SECONDARY | 8.0.30 | XCom |
+---------------------------+--------------------------------------+-------------+-------------+--------------+-------------+----------------+----------------------------+
3 rows in set (0.00 sec)
root@localhost [(none)]>
- 检查读写状态
sql
SELECT @@hostname, @@read_only, @@super_read_only;
bash
-- 在主节点
root@localhost [(none)]> SELECT @@hostname, @@read_only, @@super_read_only;
+------------+-------------+-------------------+
| @@hostname | @@read_only | @@super_read_only |
+------------+-------------+-------------------+
| mysql01 | 0 | 0 |
+------------+-------------+-------------------+
bash
-- 在从节点
root@localhost [(none)]> SELECT @@hostname, @@read_only, @@super_read_only;
+------------+-------------+-------------------+
| @@hostname | @@read_only | @@super_read_only |
+------------+-------------+-------------------+
| mysql02 | 1 | 1 |
+------------+-------------+-------------------+
1 row in set (0.01 sec)
- 测试数据同步
sql
-- 在主节点执行
CREATE DATABASE mgr_test;
USE mgr_test;
CREATE TABLE cluster_test (
id INT AUTO_INCREMENT PRIMARY KEY,
node_name VARCHAR(50),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO cluster_test (node_name) VALUES ('主节点写入测试');
bash
-- 在从节点验证
SELECT * FROM mgr_test.cluster_test;
bash
root@localhost [(none)]> SELECT * FROM mgr_test.cluster_test;
+----+-----------------------+---------------------+
| id | node_name | created_at |
+----+-----------------------+---------------------+
| 1 | 主节点写入测试 | 2026-01-28 00:41:32 |
+----+-----------------------+---------------------+
1 row in set (0.00 sec)
模拟主节点故障
- 停止主节点MySQL
bash
[root@mysql01 ~]# systemctl stop mysqld
bash
# 在其他节点检查新主节点选举
mysql -uroot -p000000 -e "
SELECT
MEMBER_HOST,
MEMBER_STATE,
MEMBER_ROLE,
CASE
WHEN MEMBER_ROLE = 'PRIMARY' THEN '🟢 新主节点'
ELSE '🔵 从节点'
END as 状态
FROM performance_schema.replication_group_members;"
bash
root@localhost [(none)]> SELECT
-> MEMBER_HOST,
-> MEMBER_STATE,
-> MEMBER_ROLE,
-> CASE
-> WHEN MEMBER_ROLE = 'PRIMARY' THEN '\U+1F7E2 新主节点'
-> ELSE '\U+1F535 从节点'
-> END as 状态
-> FROM performance_schema.replication_group_members;
+-------------+--------------+-------------+-------------------+
| MEMBER_HOST | MEMBER_STATE | MEMBER_ROLE | 状态 |
+-------------+--------------+-------------+-------------------+
| mysql02 | ONLINE | PRIMARY | 🟢 新主节点 |
| mysql03 | ONLINE | SECONDARY | 🔵 从节点 |
+-------------+--------------+-------------+-------------------+
2 rows in set (0.00 sec)
- 测试新主节点mysql02写入
bash
mysql -uroot -p000000
USE mgr_test;
INSERT INTO cluster_test (node_name) VALUES ('故障转移后写入测试');
SELECT * FROM cluster_test;
- mysql03节点查看
bash
root@localhost [(none)]> SELECT * FROM cluster_test;
ERROR 1046 (3D000): No database selected
root@localhost [(none)]> use mgr_test;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
root@localhost [mgr_test]> SELECT * FROM cluster_test;
+----+-----------------------------+---------------------+
| id | node_name | created_at |
+----+-----------------------------+---------------------+
| 1 | 主节点写入测试 | 2026-01-28 00:41:32 |
| 2 | 故障转移后写入测试 | 2026-01-28 00:50:23 |
+----+-----------------------------+---------------------+
2 rows in set (0.00 sec)
root@localhost [mgr_test]>
- 恢复原主节点(此时则需要先删除本地原有的数据再同步,不然会冲突)
MGR 增量恢复(Incremental Recovery)的典型问题。节点有本地数据,但 GTID 位置不同,导致恢复时出现冲突
bash
# 启动原主节点
systemctl start mysqld
mysql -uroot -p000000
# 删除原有数据
drop database mgr_test;
# 重新加入集群
RESET MASTER;
START GROUP_REPLICATION USER='repl', PASSWORD='ReplPassword123!';
- 查询恢复数据
bash
root@localhost [mgr_test]> select * from cluster_test;
+----+-----------------------------+---------------------+
| id | node_name | created_at |
+----+-----------------------------+---------------------+
| 1 | 主节点写入测试 | 2026-01-28 00:41:32 |
| 2 | 故障转移后写入测试 | 2026-01-28 00:50:23 |
+----+-----------------------------+---------------------+