使用Ubuntu虚拟机离线部署RKE2高可用集群

环境说明

宿主机和虚拟机的OS与内核相同,如下

bash 复制代码
$ cat /etc/issue
Ubuntu 22.04.3 LTS \n \l

$ uname -sr
Linux 6.2.0-34-generic

虚拟化软件版本

bash 复制代码
$ kvm --version
QEMU emulator version 6.2.0 (Debian 1:6.2+dfsg-2ubuntu6.14)
Copyright (c) 2003-2021 Fabrice Bellard and the QEMU Project developers
$ virt-manager --version
4.0.0

安装前准备

参考《基于ubuntu22.04安装KVM虚拟机》安装5台虚拟机

来自 https://blog.csdn.net/u010438035/article/details/129260034

参考下面修改hosts文件,并配置每台虚拟机的ip和hostname,其中vip 设置为:192.168.122.10

bash 复制代码
cat /etc/hosts
192.168.122.10 rke2-vip
192.168.122.11 rke2-master-01
192.168.122.12 rke2-master-02
192.168.122.13 rke2-master-03
192.168.122.14 rke2-node-01
192.168.122.15 rke2-node-02

配置5台虚拟机免密ssh登陆

bash 复制代码
ssh-keygen -t rsa
ssh-copy-id -i .ssh/id_rsa.pub rke2-master-01
ssh-copy-id -i .ssh/id_rsa.pub rke2-master-02
ssh-copy-id -i .ssh/id_rsa.pub rke2-master-03
ssh-copy-id -i .ssh/id_rsa.pub rke2-node-01
ssh-copy-id -i .ssh/id_rsa.pub rke2-node-01

配置5台虚拟机sudo 免输入密码

注意下面的username是使用者的实际名字,需要修改

bash 复制代码
cat > /etc/sudoers.d/passwordless  << EOF
# add in user AFTER the sudo and admin group
username ALL=(ALL) NOPASSWD: ALL
EOF

在宿主机创建一个串行脚本 allnodes

allnodes只需要放置在宿主机上,使用普通用户执行========

下面的username是使用者的实际名字,需要修改

bash 复制代码
cat > allnodes  << "EOF"
#!/bin/bash
nodes=(192.168.122.11 192.168.122.12 192.168.122.13 192.168.122.14 192.168.122.15)
for i in ${nodes[*]}; do ssh username@$i "hostname;$1"; done
EOF

chmod +x allnodes

配置5台虚拟机基础环境

1.升级Linux内核到最新版
./allnodes 'sudo apt-get -y update'
./allnodes 'sudo apt-get -y dist-upgrade'

2.设置时间同步
kubernetes要求集群中的节点时间必须精确一致,您可以同时运行一下date命令,检查一下几台机器的时间是否正常。如果正常,则可以跳过此步。
# 借助于chronyd服务(程序包名称chrony)设定各节点时间精确同步
./allnodes "sudo apt-get install -y chrony"
./allnodes "sudo chronyc sources -v"

# 设置成东八区时区
./allnodes "sudo timedatectl set-timezone Asia/Shanghai"

3.禁用swap分区
./allnodes "sudo swapoff -a"
./allnodes "sudo sed -ri 's/.*swap.*/#&/' /etc/fstab"

4.配置linux的内核参数
# 配置加载模块
./allnodes 'sudo sh -c "cat > /etc/modules-load.d/k8s.conf <<EOF
overlay
br_netfilter
EOF"'
# 加载 br_netfilter 模块
./allnodes 'sudo modprobe overlay && sudo modprobe br_netfilter'
# 添加网桥过滤及地址转发
./allnodes 'sudo sh -c "cat > /etc/sysctl.d/k8s.conf  <<EOF
net.bridge.bridge-nf-call-iptables  = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward                 = 1
EOF"'

./allnodes 'sudo sysctl --system'

# 检查是否加载
./allnodes 'sudo lsmod | grep br_netfilter'
./allnodes 'sudo lsmod | grep overlay'

5.配置ipvs功能
./allnodes 'sudo apt install -y ipset ipvsadm'

./allnodes 'sudo sh -c "cat > /etc/modules-load.d/ipvs.conf << EOF
modprobe -- ip_vs
modprobe -- ip_vs_rr
modprobe -- ip_vs_wrr
modprobe -- ip_vs_sh
modprobe -- nf_conntrack
EOF"'

# 临时加载
./allnodes 'sudo modprobe -- ip_vs && sudo modprobe -- ip_vs_rr && sudo modprobe -- ip_vs_wrr && sudo modprobe -- ip_vs_sh && sudo modprobe -- nf_conntrack'

# 开机加载配置,添加需要加载的模块
./allnodes 'sudo sh -c "cat >> /etc/modules <<EOF
ip_vs_sh
ip_vs_wrr
ip_vs_rr
ip_vs
nf_conntrack
EOF"'

# 检查是否加载
./allnodes 'sudo lsmod | grep -e -ip_vs -e nf_conntrack'

6.重启OS
上面步骤完成之后,需要重新启动linux系统: 
./allnodes 'sudo reboot'

在3台master节点配置 haproxy 与 keepalived

串行脚本masters相比allnodes去掉了node节点的ip

  1. 配置 Haproxy=====
bash 复制代码
./masters 'sudo sh -c "cat >> /etc/haproxy/haproxy.cfg << EOF
#---------------------------------------------------------------------
# kubernetes apiserver frontend which proxys to the backends
#---------------------------------------------------------------------
frontend rke2-apiserver
    mode                 tcp
    bind                 *:19345  # 监听端口
    option               tcplog
    default_backend      rke2-apiserver
#---------------------------------------------------------------------
# round robin balancing between the various backends
#---------------------------------------------------------------------
backend rke2-apiserver
    mode        tcp        # 模式 TCP
    option      tcplog
    option      tcp-check
    balance     roundrobin # 采用轮询的负载算法  
    default-server inter 10s downinter 5s rise 2 fall 2 slowstart 60s maxconn 250 maxqueue 256 weight 100
    server      rke2-master-01   192.168.122.11:9345 check 
    server      rke2-master-02   192.168.122.12:9345 check
    server      rke2-master-03   192.168.122.13:9345 check
#---------------------------------------------------------------------
# collection haproxy statistics message
#---------------------------------------------------------------------
listen stats
    bind                 *:1080
    stats auth           admin:awesomePassword
    stats refresh        5s
    stats realm          HAProxy\ Statistics
    stats uri            /admin?stats
EOF"'	

=2) keepalived 配置=

bash 复制代码
./masters 'sudo sh -c "cat >> /etc/keepalived/keepalived.conf <<EOF
global_defs {
    script_user root
    enable_script_security
}
vrrp_script chk_apiserver {
    script "/etc/keepalived/check_apiserver.sh" # 定义脚本路径和名称
    interval 5 # 每 5 秒执行一次检测,注意设置间隔太小会有问题
    weight -15 # 权重变化
    fall 2     # 检测连续2次失败才算失败
    rise 1     # 检测1次成功就算成功
}
vrrp_instance VI_1 {
    state BACKUP # backup节点设为BACKUP, <看情况调整>
    interface enp1s0  # 服务器网卡接口
    virtual_router_id 51  # 这个值只要在 keepalived 集群中保持一致即可,默认值是 51
    priority 50 #如:master设为 100,备份服务 50,比备份服务器上高就行了,如:master设为 100,备份服务 50
    advert_int 1     
    authentication {
        auth_type PASS
        auth_pass K8SHA_KA_AUTH  #这个值只要在keepalived集群中保持一致即可
    }
    virtual_ipaddress {
        192.168.122.10   # VIP 地址,<看情况调整>
    }
    track_script {
       chk_apiserver
    }
}
EOF"'

上述文件需要根据实际情况再修改
state {MASTER|BACKUP}# 主节点值为 MASTER,备节点值为 BACKUP,这里配置 rke2-master-01 为 MASTER,rke2-master-02、rke2-master-03 为 BACKUP
设置 priority 主节点的优先级大于备节点,如: 主节点rke2-master-01 设为 100,备份节点rke2-master-02、rke2-master-03设为 50

bash 复制代码
./masters 'sudo sh -c "cat >> /etc/keepalived/check_apiserver.sh <<EOF
#!/bin/bash 
err=0
for k in $(seq 1 3)
do
    check_code=$(pgrep haproxy)
    if [[ $check_code == "" ]]; then
        err=$(expr $err + 1)
        sleep 1
        continue
    else
        err=0
        break
    fi
done

if [[ $err != "0" ]]; then
    echo "systemctl stop keepalived"
    /usr/bin/systemctl stop keepalived
    exit 1
else
    exit 0
fi
EOF"'

=配置开机启动========

bash 复制代码
./masters 'sudo systemctl enable haproxy '
./masters 'sudo systemctl restart haproxy'
./masters 'sudo systemctl enable keepalived '
./masters 'sudo systemctl restart keepalived'

离线部署rke2

下载如下文件并复制到5台虚拟机

curl -OLs https://github.com/rancher/rke2/releases/download/v1.28.2%2Brke2r1/rke2-images.linux-amd64.tar.zst

curl -OLs https://github.com/rancher/rke2/releases/download/v1.28.2%2Brke2r1/rke2.linux-amd64.tar.gz

curl -OLs https://github.com/rancher/rke2/releases/download/v1.28.2%2Brke2r1/sha256sum-amd64.txt

curl -sfL https://rancher-mirror.rancher.cn/rke2/install.sh --output install.sh

在虚拟机中给install.sh增加可执行权限

安装第一个master节点

这里假设上传安装包到 /root/rke2-artifacts/ 目录了

bash 复制代码
INSTALL_RKE2_VERSION=v1.28.2+rke2r1 INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts sh /root/install.sh
mkdir -p /etc/rancher/rke2
cat >> /etc/rancher/rke2/config.yaml << EOF
#server: "https://192.168.122.10:19345" # 等三台都起来后把这个配置取消注释,重启服务
write-kubeconfig: "/root/.kube/config"
write-kubeconfig-mode: 0644
## 自定义一个 token 标识
token: "RKE2@Cluster"
## tls-san 填写LB的统一入口ip地址或域名
tls-san:
  - "192.168.122.10"

## 打上污点,不让用户工作负载调度到该节点上
node-taint:
  - "CriticalAddonsOnly=true:NoExecute"
kube-proxy-arg: # 不指定的话,默认是 iptables 模式
  - "proxy-mode=ipvs"
EOF
systemctl enable rke2-server && sudo systemctl start rke2-server

# 验证
ln -s /var/lib/rancher/rke2/agent/etc/crictl.yaml /etc/crictl.yaml
ln -s /var/lib/rancher/rke2/bin/kubectl /usr/bin/kubectl
ln -s /var/lib/rancher/rke2/bin/crictl /usr/bin/crictl

kubectl get node
crictl ps
crictl images

将其他 master 节点加入集群

与第一个master节点不同的是/etc/rancher/rke2/config.yaml中取消了server参数注释,第一个master节点的注释要等所有其他master节点起来后才取消注释

bash 复制代码
INSTALL_RKE2_VERSION=v1.28.2+rke2r1 INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts sh /root/install.sh
mkdir -p /etc/rancher/rke2
cat >> /etc/rancher/rke2/config.yaml << EOF
server: "https://192.168.122.10:19345"
write-kubeconfig: "/root/.kube/config"
write-kubeconfig-mode: 0644
## 自定义一个 token 标识
token: "RKE2@Cluster"
## tls-san 填写LB的统一入口ip地址或域名
tls-san:
  - "192.168.122.10"

## 打上污点,不让用户工作负载调度到该节点上
node-taint:
  - "CriticalAddonsOnly=true:NoExecute"
kube-proxy-arg: # 不指定的话,默认是 iptables 模式
  - "proxy-mode=ipvs"
EOF
systemctl enable rke2-server && sudo systemctl start rke2-server

# 验证
ln -s /var/lib/rancher/rke2/agent/etc/crictl.yaml /etc/crictl.yaml
ln -s /var/lib/rancher/rke2/bin/kubectl /usr/bin/kubectl
ln -s /var/lib/rancher/rke2/bin/crictl /usr/bin/crictl

kubectl get node
crictl ps
crictl images

将 node 节点加入集群

bash 复制代码
INSTALL_RKE2_TYPE="agent" INSTALL_RKE2_VERSION=v1.28.2+rke2r1 INSTALL_RKE2_ARTIFACT_PATH=/root/rke2-artifacts sh /root/install.sh
mkdir -p /etc/rancher/rke2
cat > /etc/rancher/rke2/config.yaml << EOF
server: "https://192.168.122.10:19345"
token: "RKE2@Cluster"

kube-proxy-arg:
  - "proxy-mode=ipvs"
EOF

systemctl enable rke2-agent && sudo systemctl start rke2-agent

发布一个pod进行验证

bash 复制代码
# cat<<EOF | kubectl apply -f -
apiVersion: v1
kind: Pod
metadata:
  name: busybox
  namespace: default
spec:
  containers:
  - name: busybox
    image: docker.io/library/busybox:1.28
    command:
      - sleep
      - "3600"
    imagePullPolicy: IfNotPresent
  restartPolicy: Always
EOF

# kubectl get pod -A -owide| grep busybox
default       busybox                                                 1/1     Running     0               54s   10.42.9.4        rke2-node-02     <none>           <none>

删除集群节点

bash 复制代码
## 登录到要删除的节点,执行以下命令
rke2-killall.sh
rke2-uninstall.sh
## 在master节点执行删除操作
kubectl delete node {NODE-NAME}
kubectl -n kube-system delete secrets {NODE-NAME}.node-password.rke2

参考《RKE2 高可用部署》

来自 https://blog.csdn.net/cl18707602767/article/details/132641657

相关推荐
景天科技苑3 小时前
【云原生开发】K8S多集群资源管理平台架构设计
云原生·容器·kubernetes·k8s·云原生开发·k8s管理系统
wclass-zhengge4 小时前
K8S篇(基本介绍)
云原生·容器·kubernetes
颜淡慕潇4 小时前
【K8S问题系列 |1 】Kubernetes 中 NodePort 类型的 Service 无法访问【已解决】
后端·云原生·容器·kubernetes·问题解决
wowocpp6 小时前
ubuntu 22.04 硬件配置 查看 显卡
linux·运维·ubuntu
山河君6 小时前
ubuntu使用DeepSpeech进行语音识别(包含交叉编译)
linux·ubuntu·语音识别
knighthood20016 小时前
解决:ros进行gazebo仿真,rviz没有显示传感器数据
c++·ubuntu·ros
昌sit!12 小时前
K8S node节点没有相应的pod镜像运行故障处理办法
云原生·容器·kubernetes
A ?Charis15 小时前
Gitlab-runner running on Kubernetes - hostAliases
容器·kubernetes·gitlab
北漂IT民工_程序员_ZG16 小时前
k8s集群安装(minikube)
云原生·容器·kubernetes
wowocpp17 小时前
ubuntu 22.04 server 安装 anaconda3
linux·运维·ubuntu