环境说明
宿主机和虚拟机的OS与内核相同,如下
bash
$ cat /etc/issue
Ubuntu 22.04.3 LTS \n \l
$ uname -sr
Linux 6.2.0-34-generic
虚拟化软件版本
bash
$ kvm --version
QEMU emulator version 6.2.0 (Debian 1:6.2+dfsg-2ubuntu6.14)
Copyright (c) 2003-2021 Fabrice Bellard and the QEMU Project developers
$ virt-manager --version
4.0.0
安装前准备
参考《基于ubuntu22.04安装KVM虚拟机》安装5台虚拟机
来自 https://blog.csdn.net/u010438035/article/details/129260034
参考下面修改hosts文件,并配置每台虚拟机的ip和hostname,其中vip 设置为:192.168.122.10
bash
cat /etc/hosts
192.168.122.10 rke2-vip
192.168.122.11 rke2-master-01
192.168.122.12 rke2-master-02
192.168.122.13 rke2-master-03
192.168.122.14 rke2-node-01
192.168.122.15 rke2-node-02
配置5台虚拟机免密ssh登陆
bash
ssh-keygen -t rsa
ssh-copy-id -i .ssh/id_rsa.pub rke2-master-01
ssh-copy-id -i .ssh/id_rsa.pub rke2-master-02
ssh-copy-id -i .ssh/id_rsa.pub rke2-master-03
ssh-copy-id -i .ssh/id_rsa.pub rke2-node-01
ssh-copy-id -i .ssh/id_rsa.pub rke2-node-01
配置5台虚拟机sudo 免输入密码
注意下面的username是使用者的实际名字,需要修改
bash
cat > /etc/sudoers.d/passwordless << EOF
# add in user AFTER the sudo and admin group
username ALL=(ALL) NOPASSWD: ALL
EOF
在宿主机创建一个串行脚本 allnodes
allnodes只需要放置在宿主机上,使用普通用户执行========
下面的username是使用者的实际名字,需要修改
bash
cat > allnodes << "EOF"
#!/bin/bash
nodes=(192.168.122.11 192.168.122.12 192.168.122.13 192.168.122.14 192.168.122.15)
for i in ${nodes[*]}; do ssh username@$i "hostname;$1"; done
EOF
chmod +x allnodes
配置5台虚拟机基础环境
1.升级Linux内核到最新版
./allnodes 'sudo apt-get -y update'
./allnodes 'sudo apt-get -y dist-upgrade'
2.设置时间同步
kubernetes要求集群中的节点时间必须精确一致,您可以同时运行一下date命令,检查一下几台机器的时间是否正常。如果正常,则可以跳过此步。
# 借助于chronyd服务(程序包名称chrony)设定各节点时间精确同步
./allnodes "sudo apt-get install -y chrony"
./allnodes "sudo chronyc sources -v"
# 设置成东八区时区
./allnodes "sudo timedatectl set-timezone Asia/Shanghai"
3.禁用swap分区
./allnodes "sudo swapoff -a"
./allnodes "sudo sed -ri 's/.*swap.*/#&/' /etc/fstab"
4.配置linux的内核参数
# 配置加载模块
./allnodes 'sudo sh -c "cat > /etc/modules-load.d/k8s.conf <<EOF
overlay
br_netfilter
EOF"'
# 加载 br_netfilter 模块
./allnodes 'sudo modprobe overlay && sudo modprobe br_netfilter'
# 添加网桥过滤及地址转发
./allnodes 'sudo sh -c "cat > /etc/sysctl.d/k8s.conf <<EOF
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF"'
./allnodes 'sudo sysctl --system'
# 检查是否加载
./allnodes 'sudo lsmod | grep br_netfilter'
./allnodes 'sudo lsmod | grep overlay'
5.配置ipvs功能
./allnodes 'sudo apt install -y ipset ipvsadm'
./allnodes 'sudo sh -c "cat > /etc/modules-load.d/ipvs.conf << EOF
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF"'
# 临时加载
./allnodes 'sudo modprobe -- ip_vs && sudo modprobe -- ip_vs_rr && sudo modprobe -- ip_vs_wrr && sudo modprobe -- ip_vs_sh && sudo modprobe -- nf_conntrack'
# 开机加载配置,添加需要加载的模块
./allnodes 'sudo sh -c "cat >> /etc/modules <<EOF
ip_vs_sh
ip_vs_wrr
ip_vs_rr
ip_vs
nf_conntrack
EOF"'
# 检查是否加载
./allnodes 'sudo lsmod | grep -e -ip_vs -e nf_conntrack'
6.重启OS
上面步骤完成之后,需要重新启动linux系统:
./allnodes 'sudo reboot'
在3台master节点配置 haproxy 与 keepalived
串行脚本masters相比allnodes去掉了node节点的ip
- 配置 Haproxy=====
bash
./masters 'sudo sh -c "cat >> /etc/haproxy/haproxy.cfg << EOF
#---------------------------------------------------------------------
# kubernetes apiserver frontend which proxys to the backends
#---------------------------------------------------------------------
frontend rke2-apiserver
mode tcp
bind *:19345 # 监听端口
option tcplog
default_backend rke2-apiserver
#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend rke2-apiserver
mode tcp # 模式 TCP
option tcplog
option tcp-check
balance roundrobin # 采用轮询的负载算法
default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
server rke2-master-01 192.168.122.11:9345 check
server rke2-master-02 192.168.122.12:9345 check
server rke2-master-03 192.168.122.13:9345 check
#---------------------------------------------------------------------
# collection haproxy statistics message
#---------------------------------------------------------------------
listen stats
bind *:1080
stats auth admin:awesomePassword
stats refresh 5s
stats realm HAProxy\ Statistics
stats uri /admin?stats
EOF"'
=2) keepalived 配置=
bash
./masters 'sudo sh -c "cat >> /etc/keepalived/keepalived.conf <<EOF
global_defs {
script_user root
enable_script_security
}
vrrp_script chk_apiserver {
script "/etc/keepalived/check_apiserver.sh" # 定义脚本路径和名称
interval 5 # 每 5 秒执行一次检测,注意设置间隔太小会有问题
weight -15 # 权重变化
fall 2 # 检测连续2次失败才算失败
rise 1 # 检测1次成功就算成功
}
vrrp_instance VI_1 {
state BACKUP # backup节点设为BACKUP, <看情况调整>
interface enp1s0 # 服务器网卡接口
virtual_router_id 51 # 这个值只要在 keepalived 集群中保持一致即可,默认值是 51
priority 50 #如:master设为 100,备份服务 50,比备份服务器上高就行了,如:master设为 100,备份服务 50
advert_int 1
authentication {
auth_type PASS
auth_pass K8SHA_KA_AUTH #这个值只要在keepalived集群中保持一致即可
}
virtual_ipaddress {
192.168.122.10 # VIP 地址,<看情况调整>
}
track_script {
chk_apiserver
}
}
EOF"'
上述文件需要根据实际情况再修改
state {MASTER|BACKUP}# 主节点值为 MASTER,备节点值为 BACKUP,这里配置 rke2-master-01 为 MASTER,rke2-master-02、rke2-master-03 为 BACKUP
设置 priority 主节点的优先级大于备节点,如: 主节点rke2-master-01 设为 100,备份节点rke2-master-02、rke2-master-03设为 50
bash
./masters 'sudo sh -c "cat >> /etc/keepalived/check_apiserver.sh <<EOF
#!/bin/bash
err=0
for k in $(seq 1 3)
do
check_code=$(pgrep haproxy)
if [[ $check_code == "" ]]; then
err=$(expr $err + 1)
sleep 1
continue
else
err=0
break
fi
done
if [[ $err != "0" ]]; then
echo "systemctl stop keepalived"
/usr/bin/systemctl stop keepalived
exit 1
else
exit 0
fi
EOF"'
=配置开机启动========
bash
./masters 'sudo systemctl enable haproxy '
./masters 'sudo systemctl restart haproxy'
./masters 'sudo systemctl enable keepalived '
./masters 'sudo systemctl restart keepalived'
离线部署rke2
下载如下文件并复制到5台虚拟机
curl -OLs https://github.com/rancher/rke2/releases/download/v1.28.2%2Brke2r1/rke2-images.linux-amd64.tar.zst
curl -OLs https://github.com/rancher/rke2/releases/download/v1.28.2%2Brke2r1/rke2.linux-amd64.tar.gz
curl -OLs https://github.com/rancher/rke2/releases/download/v1.28.2%2Brke2r1/sha256sum-amd64.txt
curl -sfL https://rancher-mirror.rancher.cn/rke2/install.sh --output install.sh
在虚拟机中给install.sh增加可执行权限
安装第一个master节点
这里假设上传安装包到 /root/rke2-artifacts/ 目录了
bash
INSTALL_RKE2_VERSION=v1.28.2+rke2r1 INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts sh /root/install.sh
mkdir -p /etc/rancher/rke2
cat >> /etc/rancher/rke2/config.yaml << EOF
#server: "https://192.168.122.10:19345" # 等三台都起来后把这个配置取消注释,重启服务
write-kubeconfig: "/root/.kube/config"
write-kubeconfig-mode: 0644
## 自定义一个 token 标识
token: "RKE2@Cluster"
## tls-san 填写LB的统一入口ip地址或域名
tls-san:
- "192.168.122.10"
## 打上污点,不让用户工作负载调度到该节点上
node-taint:
- "CriticalAddonsOnly=true:NoExecute"
kube-proxy-arg: # 不指定的话,默认是 iptables 模式
- "proxy-mode=ipvs"
EOF
systemctl enable rke2-server && sudo systemctl start rke2-server
# 验证
ln -s /var/lib/rancher/rke2/agent/etc/crictl.yaml /etc/crictl.yaml
ln -s /var/lib/rancher/rke2/bin/kubectl /usr/bin/kubectl
ln -s /var/lib/rancher/rke2/bin/crictl /usr/bin/crictl
kubectl get node
crictl ps
crictl images
将其他 master 节点加入集群
与第一个master节点不同的是/etc/rancher/rke2/config.yaml中取消了server参数注释,第一个master节点的注释要等所有其他master节点起来后才取消注释
bash
INSTALL_RKE2_VERSION=v1.28.2+rke2r1 INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts sh /root/install.sh
mkdir -p /etc/rancher/rke2
cat >> /etc/rancher/rke2/config.yaml << EOF
server: "https://192.168.122.10:19345"
write-kubeconfig: "/root/.kube/config"
write-kubeconfig-mode: 0644
## 自定义一个 token 标识
token: "RKE2@Cluster"
## tls-san 填写LB的统一入口ip地址或域名
tls-san:
- "192.168.122.10"
## 打上污点,不让用户工作负载调度到该节点上
node-taint:
- "CriticalAddonsOnly=true:NoExecute"
kube-proxy-arg: # 不指定的话,默认是 iptables 模式
- "proxy-mode=ipvs"
EOF
systemctl enable rke2-server && sudo systemctl start rke2-server
# 验证
ln -s /var/lib/rancher/rke2/agent/etc/crictl.yaml /etc/crictl.yaml
ln -s /var/lib/rancher/rke2/bin/kubectl /usr/bin/kubectl
ln -s /var/lib/rancher/rke2/bin/crictl /usr/bin/crictl
kubectl get node
crictl ps
crictl images
将 node 节点加入集群
bash
INSTALL_RKE2_TYPE="agent" INSTALL_RKE2_VERSION=v1.28.2+rke2r1 INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts sh /root/install.sh
mkdir -p /etc/rancher/rke2
cat > /etc/rancher/rke2/config.yaml << EOF
server: "https://192.168.122.10:19345"
token: "RKE2@Cluster"
kube-proxy-arg:
- "proxy-mode=ipvs"
EOF
systemctl enable rke2-agent && sudo systemctl start rke2-agent
发布一个pod进行验证
bash
# cat<<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
name: busybox
namespace: default
spec:
containers:
- name: busybox
image: docker.io/library/busybox:1.28
command:
- sleep
- "3600"
imagePullPolicy: IfNotPresent
restartPolicy: Always
EOF
# kubectl get pod -A -owide| grep busybox
default busybox 1/1 Running 0 54s 10.42.9.4 rke2-node-02 <none> <none>
删除集群节点
bash
## 登录到要删除的节点,执行以下命令
rke2-killall.sh
rke2-uninstall.sh
## 在master节点执行删除操作
kubectl delete node {NODE-NAME}
kubectl -n kube-system delete secrets {NODE-NAME}.node-password.rke2
参考《RKE2 高可用部署》
来自 https://blog.csdn.net/cl18707602767/article/details/132641657