1、概述
今天我们来学习一下Zookeeper集群相关的内容,本文主要的内容有集群环境的搭建,集群常见的问题和对应的解决方案。
2、集群环境搭建
2.1、准备工作
首先我们准备好安装包,创建好集群部署的路径。将解压后的安装文件复制三分。这里我在 /usr/local目录下创建了一个zkCluster目录。相关命令如下
bash
root@localhost ~]# cd /usr/local/
[root@localhost local]# mkdir zkCluster
[root@localhost local]# cp zookeeper-3.9.2/ ./zkCluster/ -R
[root@localhost local]# cd zkCluster/
[root@localhost zkCluster]# ll
总用量 0
drwxr-xr-x. 7 root root 145 10月 25 12:58 zookeeper-3.9.2
[root@localhost zkCluster]# mv zookeeper-3.9.2/ zookeeper-9000
[root@localhost zkCluster]# ll
总用量 0
drwxr-xr-x. 7 root root 145 10月 25 12:58 zookeeper-9000
[root@localhost zkCluster]# cp zookeeper-9000/ zookeeper-9001/ -R
[root@localhost zkCluster]# cp zookeeper-9000/ zookeeper-9002/ -R
[root@localhost zkCluster]# ll
总用量 0
drwxr-xr-x. 7 root root 145 10月 25 12:58 zookeeper-9000
drwxr-xr-x. 7 root root 145 10月 25 12:59 zookeeper-9001
drwxr-xr-x. 7 root root 145 10月 25 12:59 zookeeper-9002
[root@localhost zkCluster]#
2.2、修改配置
准备好安装文件后我们需要修改配置文件,首先我们配置9000的节点。进入到conf目录下,修改zoo.cfg文件。文件内容如下图 所示
这里解释一下最后三行配置,每一行 的格式都是:server.服务器ID=服务器IP地址:服务器之间通信端口:服务器之间投票选举端口
这里我是在一台虚拟机上搭建的集群,所以ip都是一样的,但是 通讯端口和投票选举端口需要单独指定。接着我们9001 和9002 节点的配置内容如下
配置完成后我们 好需要新建一个数据目录。也就是上述配置种的dataDir指定的
bash
[root@localhost conf]# mkdir -p /opt/zkCluster/zookeeper-9002/data
[root@localhost conf]# mkdir -p /opt/zkCluster/zookeeper-9001/data
[root@localhost conf]#
[root@localhost conf]# mkdir -p /opt/zkCluster/zookeeper-9000/data
[root@localhost conf]#
[root@localhost conf]# cd /opt/zkCluster/
[root@localhost zkCluster]# ll
总用量 0
drwxr-xr-x. 3 root root 18 10月 25 13:15 zookeeper-9000
drwxr-xr-x. 3 root root 18 10月 25 13:15 zookeeper-9001
drwxr-xr-x. 3 root root 18 10月 25 13:15 zookeeper-9002
[root@localhost zkCluster]# pwd
/opt/zkCluster
[root@localhost zkCluster]#
最后我们需要为上述三个节点分别创建一个 myid 文件,内容是1,2,3。用来标识每个服务器的ID
bash
[root@localhost zkCluster]# echo 1 > /opt/zkCluster/zookeeper-9000/data/myid
[root@localhost zkCluster]# echo 2 > /opt/zkCluster/zookeeper-9001/data/myid
[root@localhost zkCluster]# echo 3 > /opt/zkCluster/zookeeper-9002/data/myid
[root@localhost zkCluster]#
2.3、启动集群
完成配置后,我们就可以启动集群了,启动集群就是分别启动这3个服务
bash
[root@localhost zkCluster]# ./zookeeper-9000/bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9000/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@localhost zkCluster]# ./zookeeper-9001/bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9001/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@localhost zkCluster]# ./zookeeper-9002/bin/zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9002/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@localhost zkCluster]#
启动成功后我们可以查看状态
bash
[root@localhost zkCluster]# ./zookeeper-9002/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9002/bin/../conf/zoo.cfg
Client port found: 9002. Client address: localhost. Client SSL: false.
Mode: follower
[root@localhost zkCluster]# ./zookeeper-9001/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9001/bin/../conf/zoo.cfg
Client port found: 9001. Client address: localhost. Client SSL: false.
Mode: leader
[root@localhost zkCluster]# ./zookeeper-9000/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9000/bin/../conf/zoo.cfg
Client port found: 9000. Client address: localhost. Client SSL: false.
Mode: follower
[root@localhost zkCluster]#
上面的状态说明 9000和9002是从节点,9001被选举成了 主节点了。集群环境启动成功;
2.4、验证集群
我们也可以通过端口或者进程来判断集群是否正常
我们登陆这三个节点的任意一个节点的命令行窗口,这里我的登录到9000
bash
JLine support is enabled
2024-10-25 13:26:06,594 [myid:127.0.0.1:9000] - INFO [main-SendThread(127.0.0.1:9000):o.a.z.ClientCnxn$SendThread@1432] - Session establishment complete on server localhost/127.0.0.1:9000, session id = 0x1000016003f0000, negotiated timeout = 30000
WATCHER::
WatchedEvent state:SyncConnected type:None path:null zxid: -1
[zk: 127.0.0.1:9000(CONNECTED) 0] ls /
[zookeeper]
[zk: 127.0.0.1:9000(CONNECTED) 1] create /tom tom
Created /tom
[zk: 127.0.0.1:9000(CONNECTED) 2]
这里我创建了一个tom节点,我们再去9001和9002上查看
bash
WatchedEvent state:SyncConnected type:None path:null zxid: -1
[zk: 127.0.0.1:9001(CONNECTED) 0] ls /
[tom, zookeeper]
[zk: 127.0.0.1:9001(CONNECTED) 1] get /tom
tom
[zk: 127.0.0.1:9001(CONNECTED) 2] set /tom jerry
[zk: 127.0.0.1:9001(CONNECTED) 3] get /tom
jerry
[zk: 127.0.0.1:9001(CONNECTED) 4]
我么在 9001上修改/tom 的值,然后到9000上查看 发现也是正常
bash
WatchedEvent state:SyncConnected type:None path:null zxid: -1
[zk: 127.0.0.1:9000(CONNECTED) 0] ls /
[zookeeper]
[zk: 127.0.0.1:9000(CONNECTED) 1] create /tom tom
Created /tom
[zk: 127.0.0.1:9000(CONNECTED) 2] get /tom
jerry
[zk: 127.0.0.1:9000(CONNECTED) 3]
这里说明集群环境搭建成功了。
3、集群环境的异常情况
3.1、从节点挂掉
现在集群的情况是 9000和9002是从节点,9001被选举成了 主节点 。当集群中有一个从节点挂掉的时候会怎么样呢,我们来停掉9000模拟一下,
bash
[root@localhost zkCluster]# ./zookeeper-9000/bin/zkServer.sh stop
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9000/bin/../conf/zoo.cfg
Stopping zookeeper ... STOPPED
[root@localhost zkCluster]#
[root@localhost zkCluster]#
[root@localhost zkCluster]#
[root@localhost zkCluster]# ./zookeeper-9000/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9000/bin/../conf/zoo.cfg
Client port found: 9000. Client address: localhost. Client SSL: false.
Error contacting service. It is probably not running.
[root@localhost zkCluster]# ./zookeeper-9001/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9001/bin/../conf/zoo.cfg
Client port found: 9001. Client address: localhost. Client SSL: false.
Mode: leader
[root@localhost zkCluster]# ./zookeeper-9002/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9002/bin/../conf/zoo.cfg
Client port found: 9002. Client address: localhost. Client SSL: false.
Mode: follower
[root@localhost zkCluster]#
我们发现挂了一个从节点,集群依然是正常的。
3.2、主节点挂掉
我么重启启动9000,然后停掉9001节点
bash
[root@localhost zkCluster]# ./zookeeper-9001/bin/zkServer.sh stop
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9001/bin/../conf/zoo.cfg
Stopping zookeeper ... STOPPED
[root@localhost zkCluster]# ./zookeeper-9001/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9001/bin/../conf/zoo.cfg
Client port found: 9001. Client address: localhost. Client SSL: false.
Error contacting service. It is probably not running.
[root@localhost zkCluster]# ./zookeeper-9002/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9002/bin/../conf/zoo.cfg
Client port found: 9002. Client address: localhost. Client SSL: false.
Mode: leader
[root@localhost zkCluster]# ./zookeeper-9000/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9000/bin/../conf/zoo.cfg
Client port found: 9000. Client address: localhost. Client SSL: false.
Mode: follower
[root@localhost zkCluster]#
这里我们发现主节点挂掉之后 9000和9002两个节点之间会进行一个选举,这里9002成了主节点
3.3、从节点都挂掉
现在9002是主节点了,我们停掉两个从节点,看看主节点状态
bash
[root@localhost zkCluster]#
[root@localhost zkCluster]# ./zookeeper-9001/bin/zkServer.sh stop
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9001/bin/../conf/zoo.cfg
Stopping zookeeper ... STOPPED
[root@localhost zkCluster]# ./zookeeper-9000/bin/zkServer.sh stop
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9000/bin/../conf/zoo.cfg
Stopping zookeeper ... STOPPED
[root@localhost zkCluster]# ./zookeeper-9002/bin/zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/local/zkCluster/zookeeper-9002/bin/../conf/zoo.cfg
Client port found: 9002. Client address: localhost. Client SSL: false.
Error contacting service. It is probably not running.
[root@localhost zkCluster]#
我们发现两个从节点都挂了,主节点也不能好好的运行了。这是因为集群中超过半数节点挂了,然后集群默认就挂了。
4、Zookeeper的强一致性
在2.4章节里 我们是在从节点写入的数据,但是最终数据也能同步到主节点和另外一个从节点,这是什么原因呢。我们都知道Zookeeper是强一致性的,那么写入事件肯定就必须要主节点完成,再由主节点同步到从节点。
这里我么可以查阅官方文档
ZooKeeper: Because Coordinating Distributed Systems is a Zoo
大概的意思就是 当我们往从节点写数据的时候 其实会被转发到主节点,当主节点写入完成后再同步给从节点,从而确保了在网络分区的情况下,数据也保持一致。
5、总结
本篇文章主要给家介绍Zookeeper集群环境的搭建以及相关的异常情况产生的效果,大家可以根据本文中提供的命令进行实操。最后还闲聊了一下Zookeeper强一致性的特性,建议大家可以去官方文档上查阅相关的内容,相信一定会有不同 的收获。