swarm安装使用
docker安装
shell
root@133:~# apt-get update
root@133:~# apt-cache showpkg docker.io
root@133:~# apt-get -y install docker.io=27.5.1-0ubuntu3~22.04.2
root@133:~# vi /etc/docker/daemon.json
root@133:~# systemctl daemon-reload
root@133:~# systemctl restart docker
root@133:~# docker --version
Docker version 27.5.1, build 27.5.1-0ubuntu3~22.04.2
初始化swarm集群
shell
# 初始化创建docker swarm 集群
root@133:~# docker swarm init --advertise-addr 10.1.0.133
Swarm initialized: current node (580eqtnzes235xmolbs4bfyuh) is now a manager.
To add a worker to this swarm, run the following command:
docker swarm join --token SWMTKN-1-4j5hz10z6lk0usag...10.1.0.133:2377
...
root@133:~# docker node ls # 默认以hostname为节点名
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
580eqtnzes235xmolbs4bfyuh * 133 Ready Active Leader 27.5.1
加入集群
shell
# 获取work、manager加入集群的命令
root@133:~# docker swarm join-token worker
To add a worker to this swarm, run the following command:
docker swarm join --token SWMTKN-1-4j5hz10z6lk0usag6vc...10.1.0.133:2377
root@133:~# docker swarm join-token manager
To add a manager to this swarm, run the following command:
docker swarm join --token SWMTKN-1-4j5hz10z6lk0usag6vc...10.1.0.133:2377
root@134:~# docker swarm join --token SWMTKN-1-4j5hz10z6lk0usag6vc...10.1.0.133:2377
root@135:~# docker swarm join --token SWMTKN-1-4j5hz10z6lk0usag6vc...10.1.0.133:2377
节点管理
shell
# 查看节点
root@133:~# docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
sfcdzkkl842sdcrdxell0xyf2 * 133 Ready Active Leader 27.5.1
9r9ha7dezgxb0o4zgcrxr8786 134 Ready Active 27.5.1
lkdqlwvolpjb6t33b6vucxfbo 135 Ready Active 27.5.1
# 节点自身退出
root@135:~# docker swarm leave
# 若是节点是manager身份,要退出集群可先先降权再退出,manager管理身份节点无法直接退出集群,若强制退出,则该节点后期无法再加入集群
docker node demote 135 #降权
docker node promote 135 #升权
docker node rm 节点ID
# 管理节点删除节点
root@133:~# docker node rm 134
Error response from daemon: rpc error: code = FailedPrecondition desc = node 9r9ha7dezgxb0o4zgcrxr8786 is not down and can't be removed
# Down的节点才可以直接删除
root@133:~# docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
sfcdzkkl842sdcrdxell0xyf2 * 133 Ready Active Leader 27.5.1
9r9ha7dezgxb0o4zgcrxr8786 134 Ready Active 27.5.1
lkdqlwvolpjb6t33b6vucxfbo 135 Down Active 27.5.1
root@133:~# docker node rm 135
135
root@133:~# docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
sfcdzkkl842sdcrdxell0xyf2 * 133 Ready Active Leader 27.5.1
9r9ha7dezgxb0o4zgcrxr8786 134 Ready Active 27.5.1
swarm元数据
shell
# swarm数据默认存储在/var/lib/docker/swarm,可备份它作集群还原。
root@133:~# ls /var/lib/docker/swarm/
certificates docker-state.json raft state.json worker
root@133:~# tar -xzvf swarm_backup_YYYYMMDD.tar.gz -C /var/lib/docker swarm/
# 恢复集群
root@133:~# systemctl stop docker
root@133:~# rm -rf /var/lib/docker/swarm
root@133:~# tar -xf swarm_backup_YYYYMMDD.tar.gz -C /var/lib/docker
root@133:~# systemctl start docker
# 重新初始化节点
root@133:~# systemctl stop docker
root@133:~# rm -rf /var/lib/docker/swarm
root@133:~# docker swarm init --advertise-addr <IP地址>
集群中节点状态
shell
# 集群状态 STATUS
active 可调度
pause 不可调度,可以继续运行旧的容器
drain 不可调度,旧的会被驱逐走
docker node update --availability [状态] [节点NAME]
docker node update --availability drain 134
root@133:~# docker node update --availability drain 134
134
root@133:~# docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
sfcdzkkl842sdcrdxell0xyf2 * 133 Ready Active Leader 27.5.1
9r9ha7dezgxb0o4zgcrxr8786 134 Ready Drain 27.5.1
# 恢复
root@133:~# docker node update --availability active 134
134
root@133:~# docker node ls
ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
sfcdzkkl842sdcrdxell0xyf2 * 133 Ready Active Leader 27.5.1
9r9ha7dezgxb0o4zgcrxr8786 134 Ready Active 27.5.1
添加节点标签
shell
# 添加节点标签
docker node update --label-add [标签=内容] [节点NAME]
docker node update --label-add dev=true 134
# 查看节点详细信息
docker node inspect [节点NAME]
root@133:~# docker node inspect 134|grep -A5 Labels
"Labels": {
"dev": "true"
},
# 删除节点标签
docker node update --label-rm [标签] [节点NAME]
docker node update --label-rm dev 134
创建服务
shell
# 创建服务
root@133:~# docker service create --replicas 1 --name web nginx
# 查看集群中的服务
root@133:~# docker service ls
ID NAME MODE REPLICAS IMAGE PORTS
peqaicc77jra web replicated 1/1 nginx:latest
# 查看集群中服务的详细
root@133:~# docker service ps web
ID NAME IMAGE NODE DESIRED STATE CURRENT STATE ERROR PORTS
hwbcctxbtfsw web.1 nginx:latest 133 Running Running 5 minutes ago
更新服务
值得注意的是: 更新后,历史版本变成Shutdown。保留了5个版本容器为exited状态。更新自拉取的镜像tag为none,原因待定。
shell
# 扩容服务
root@133:~# docker service scale web=2
web scaled to 2
overall progress: 2 out of 2 tasks
1/2: running [==================================================>]
2/2: running [==================================================>]
verify: Service web converged
root@133:~# docker service ps web
ID NAME IMAGE NODE DESIRED STATE CURRENT STATE ERROR PORTS
hwbcctxbtfsw web.1 nginx:latest 133 Running Running 9 minutes ago
g3u5q1lee6ft web.2 nginx:latest 133 Running Running 8 seconds ago
# 更新服务,历史版本变成Shutdown,值得注意的是 新拉取的镜像tag为none,
root@133:~# docker service update --image nginx:1.23 web
root@133:~# docker service update --image nginx:1.26 web # 多操作几次
root@133:~# docker service ps web
ID NAME IMAGE NODE DESIRED STATE CURRENT STATE ERROR PORTS
ij9ewrafhysx web.1 nginx:1.23 133 Running Running 5 minutes ago
w8341mkptdpo \_ web.1 nginx:latest 133 Shutdown Shutdown 5 minutes ago
9oovfqw43tpj \_ web.1 nginx:1.26 133 Shutdown Shutdown 7 minutes ago
xx6rx0esn7ce \_ web.1 nginx:latest 133 Shutdown Shutdown 7 minutes ago
7j2p8z1uhcsu \_ web.1 nginx:1.26 133 Shutdown Shutdown 8 minutes ago
basbg9j8tky8 web.2 nginx:1.23 134 Running Running 4 minutes ago
nd0fdyz7780s \_ web.2 nginx:latest 134 Shutdown Shutdown 5 minutes ago
i3mz37ilx2d3 \_ web.2 nginx:1.26 134 Shutdown Shutdown 7 minutes ago
qh0jhckpdg23 \_ web.2 nginx:latest 134 Shutdown Shutdown 7 minutes ago
n6mrd5xclg9o \_ web.2 nginx:1.26 134 Shutdown Shutdown 8 minutes ago
# ps -a 发现保留了5个版本
root@133:~# docker ps -a|grep web.1
9c88bf7bd513 nginx:1.23 "/docker-entrypoint...." 9 minutes ago Up 9 minutes 80/tcp web.1.ij9ewrafhysxul3jq2ohi7ezz
6237d5e83de5 nginx:latest "/docker-entrypoint...." 11 minutes ago Exited (0) 10 minutes ago web.1.w8341mkptdpohblius5bkxbvd
8bd20db5f437 nginx:1.26 "/docker-entrypoint...." 12 minutes ago Exited (0) 11 minutes ago web.1.9oovfqw43tpjkqxvh69bf2yjt
a993ba580912 nginx:latest "/docker-entrypoint...." 13 minutes ago Exited (0) 12 minutes ago web.1.xx6rx0esn7cem4rzze8d1ckpi
2d7953e906ec nginx:1.26 "/docker-entrypoint...." 13 minutes ago Exited (0) 12 minutes ago web.1.7j2p8z1uhcsu898oco145vx0j
# 新拉取的镜像tag为none,原因待定。
root@134:~# docker images|grep nginx
nginx <none> 203ad09fc156 7 weeks ago 192MB
nginx <none> 64e5ac93d424 8 months ago 192MB
nginx <none> a7be6198544f 2 years ago 142MB
nginx <none> 6c0218f16876 2 years ago 142MB
回滚服务
shell
# 回滚,直接执行,当前与上一个版本间来回回滚,且会覆盖掉最旧的版本 当前是nginx:1.23,
root@133:~# docker service rollback web
root@133:~# docker ps -a|grep web.1
f5e56ee90971 nginx:1.23 "/docker-entrypoint...." 14 seconds ago Up 12 seconds 80/tcp web.1.z6zmfvw3awi1qq3jl9p1fs8yv
# 只在当前与上一个版本间回滚,且 多次回滚会覆盖掉最旧的版本
root@133:~# docker ps -a|grep web.1
529d6e2816b0 nginx:latest "/docker-entrypoint...." 13 seconds ago Up 11 seconds 80/tcp web.1.wi50fwt3cpqidaxglvd27p59x
f5e56ee90971 nginx:1.23 "/docker-entrypoint...." 3 minutes ago Exited (0) 12 seconds ago web.1.z6zmfvw3awi1qq3jl9p1fs8yv
3089ae772a6e nginx:latest "/docker-entrypoint...." 3 minutes ago Exited (0) 3 minutes ago web.1.an0gscn0u2cjjnestiedlsdip
9c88bf7bd513 nginx:1.23 "/docker-entrypoint...." 14 minutes ago Exited (0) 3 minutes ago web.1.ij9ewrafhysxul3jq2ohi7ezz
6237d5e83de5 nginx:latest "/docker-entrypoint...." 16 minutes ago Exited (0) 15 minutes ago web.1.w8341mkptdpohblius5bkxbvd
# 建议直接更新到需要回滚的版本镜像
root@133:~# docker service update --image nginx:1.26 web
创建数据卷
本机创建的卷不能被共享,需要在所有节点创建同名一致的卷
shell
root@133:~# docker volume inspect test
[
{
"Driver": "local",
"Labels": null,
"Mountpoint": "/var/lib/docker/volumes/test/_data",
"Name": "test",
"Options": null,
"Scope": "local"
}
]
# 本机创建的卷不能被共享,在所有节点创建一致的卷
root@133:~# docker volume create --name test2 --opt type=none --opt device=/mnt/test1 --opt o=bind
test2
root@133:~# docker volume inspect test2
[
{
"Driver": "local",
"Labels": null,
"Mountpoint": "/var/lib/docker/volumes/test2/_data",
"Name": "test2",
"Options": {
"device": "/mnt/test1",
"o": "bind",
"type": "none"
},
"Scope": "local"
}
]
root@133:~# ls -d /var/lib/docker/volumes/test*
/var/lib/docker/volumes/test /var/lib/docker/volumes/test2
root@133:~# mkdir /mnt/test1
root@133:~# echo hello test1>/mnt/test1/index.html
root@133:~# docker service create --mount type=volume,src=test2,dst=/usr/share/nginx/html --replicas 2 --name web2 nginx
root@133:~# docker exec -it c6741351bb9f cat /usr/share/nginx/html/index.html
hello test1
root@134:~# docker exec -it 3cc5bc7314b9 cat /usr/share/nginx/html/index.html
...
<title>Welcome to nginx!</title>
外部访问
shell
# 添加外部访问端口
root@133:~# docker service update --publish-add 38080:80 web2
root@133:~# curl 10.1.0.133:38080
hello test1
root@133:~# curl 10.1.0.134:38080
hello test1
root@133:~# curl 10.1.0.134:38080 # 服务轮询,轮询到134上(无卷创建)时,返回Welcome to nginx
...
<title>Welcome to nginx!</title>
# swarm必须使用overlay网络。
默认Ingress模式(默认:通过任意Swarm节点的IP地址+映射端口访问服务,即使该节点没有运行服务副本)
root@135:~# docker network ls
NETWORK ID NAME DRIVER SCOPE
063a9a7ab6f4 bridge bridge local # 桥接
0e282740f3e0 host host local # 主机
ueiz673af50x ingress overlay swarm # swarm初始化后自动创建
2cc1ea49762f none null local # 无网络
# 默认的网络
root@133:~# docker inspect ingress
[
{
"Name": "ingress",
...
"Config": [
{
"Subnet": "10.0.0.0/24",
"Gateway": "10.0.0.1"
# 指定ip网段创建,避免地址冲突
root@135:~# docker swarm init --advertise-addr 10.1.0.135 --default-addr-pool 192.20.0.0/16
root@135:~# docker network inspect ingress
[
{
"Name": "ingress",
...
"Config": [
{
"Subnet": "192.20.0.0/24",
"Gateway": "192.20.0.1"
# 创建自己的网段用于部署
root@135:~# docker network create -d overlay --attachable my_external_network
7xvrub2320iwqc274nxv6h175
docker network create -d overlay --subnet=192.20.100.0/24 --attachable my_external_network
root@135:~# docker inspect my_external_network
[
{
"Name": "my_external_network",
...
"Config": [
{
"Subnet": "192.20.1.0/24",
"Gateway": "192.20.1.1"
}
]
},