02 K8s双主安装

00 前提准备

sh 复制代码
# 设置好主机名并添加hosts解析
主机名 IP地址 身份
master01 172.2.25.50 主Master
master02 172.2.25.51 备Master
node01 172.2.25.52 节点1
node02 172.2.25.53 节点2
node03 172.2.25.54 节点3

01 使用脚本安装Docker

sh 复制代码
# Docker脚本文件
https://node-siyuan.oss-cn-beijing.aliyuncs.com/K8s%E6%96%87%E4%BB%B6/docker-install.tar.gz
sh 复制代码
# 解压并执行Docker安装脚本
[root@master01 opt]# ls
docker-install.tar.gz
[root@master01 opt]# tar -zxvf docker-install.tar.gz 
docker-install/
docker-install/download/
docker-install/download/docker-compose-v2.23.0
docker-install/download/docker-20.10.24.tgz
docker-install/download/docker
docker-install/download/docker.service
docker-install/download/daemon.json
docker-install/install-docker.sh
[root@master01 opt]# cd docker-install/
[root@master01 docker-install]# ./install-docker.sh i

02 基础环境搭建

sh 复制代码
# 临时和永久关闭swap分区
[root@master01 docker-install]# swapoff -a && sysctl -w vm.swappiness=0
vm.swappiness = 0
[root@master01 docker-install]# sed -ri '/^[^#]*swap/s@^@#@' /etc/fstab


图1 关闭swap分区

sh 复制代码
# 确保各个节点MAC地址或product_uuid唯一
[root@master01 docker-install]# ifconfig  ens33  | grep ether | awk '{print $2}'
00:0c:29:5f:82:68
[root@master01 docker-install]# cat /sys/class/dmi/id/product_uuid
4FE74D56-8C86-11D7-9761-FF1D435F8268
# 温馨提示:
    # 一般来讲,硬件设备会拥有唯一的地址,但是有些虚拟机的地址可能会重复。 
    # Kubernetes使用这些值来唯一确定集群中的节点。 如果这些值在每个节点上不唯一,可能会导致安装失败。
sh 复制代码
# 确保网络的连通性
[root@master01 docker-install]# ping -c 3 baidu.com
PING baidu.com (110.242.68.66) 56(84) bytes of data.
64 bytes from 110.242.68.66 (110.242.68.66): icmp_seq=1 ttl=128 time=40.6 ms
64 bytes from 110.242.68.66 (110.242.68.66): icmp_seq=2 ttl=128 time=41.1 ms
64 bytes from 110.242.68.66 (110.242.68.66): icmp_seq=3 ttl=128 time=37.2 ms

--- baidu.com ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 2002ms
rtt min/avg/max/mdev = 37.299/39.670/41.103/1.704 ms
sh 复制代码
# 允许iptable检查桥接流量
cat <<EOF | tee /etc/modules-load.d/k8s.conf
br_netfilter
EOF

cat <<EOF | tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF
sysctl --system
sh 复制代码
# 设置Docker环境
[root@master01 docker-install]# systemctl enable --now docker
[root@master01 docker-install]# cat /etc/docker/daemon.json 
{
	"exec-opts": ["native.cgroupdriver=systemd"]
}
[root@master01 docker-install]# systemctl restart docker.service 

03 安装K8s基本包

sh 复制代码
# K8s的RPM包下载
https://node-siyuan.oss-cn-beijing.aliyuncs.com/K8s%E6%96%87%E4%BB%B6/kubeadmin-rpm-1_23_17.tar.gz
sh 复制代码
# 解压并安装压缩包
[root@master01 opt]# tar -zxvf kubeadmin-rpm-1_23_17.tar.gz
[root@master01 opt]# yum localinstall -y kubeadmin-rpm-1_23_17/*.rpm
sh 复制代码
# 设置时区
[root@master01 opt]# ln -svf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
sh 复制代码
# 导入Docker镜像(注意:Master和Node分别导入到Master和Node主机,CNI插件所有主机都装上)
	# Master节点镜像
https://node-siyuan.oss-cn-beijing.aliyuncs.com/K8s%E6%96%87%E4%BB%B6/%E5%88%9D%E5%A7%8B%E5%8C%96K8s%E9%95%9C%E5%83%8F/master-1.23.17.tar.gz

	# Node节点镜像
https://node-siyuan.oss-cn-beijing.aliyuncs.com/K8s%E6%96%87%E4%BB%B6/%E5%88%9D%E5%A7%8B%E5%8C%96K8s%E9%95%9C%E5%83%8F/slave-1.23.17.tar.gz
	
	# CNI插件
https://node-siyuan.oss-cn-beijing.aliyuncs.com/K8s%E6%96%87%E4%BB%B6/%E5%88%9D%E5%A7%8B%E5%8C%96K8s%E9%95%9C%E5%83%8F/cni-v1.5.1-flannel-v0.25.6.tar.gz
sh 复制代码
# 所有主机导入CNI镜像
[root@master01 opt]# docker load -i cni-v1.5.1-flannel-v0.25.6.tar.gz 
30f8e4588c9e: Loading layer [==================================================>]  8.081MB/8.081MB
05bb1aee8d21: Loading layer [==================================================>]  2.836MB/2.836MB
Loaded image: flannel/flannel-cni-plugin:v1.5.1-flannel2
574b3797bb46: Loading layer [==================================================>]  3.223MB/3.223MB
8047f399ca1f: Loading layer [==================================================>]  16.68MB/16.68MB
sh 复制代码
# Master主机节点导入镜像
[root@master01 opt]# docker load -i master-1.23.17.tar.gz
cb60fb9b862c: Loading layer [==================================================>]  3.676MB/3.676MB
4e0ee8e33055: Loading layer [==================================================>]  1.538MB/1.538MB
f1f1d923ca19: Loading layer [==================================================>]  126.1MB/126.1MB
sh 复制代码
# Node主机节点导入镜像
[root@node03 opt]# docker load -i slave-1.23.17.tar.gz
1bb475414a7e: Loading layer [==================================================>]   7.67MB/7.67MB
895a9dd950c1: Loading layer [==================================================>]  9.353MB/9.353MB
290b7ce264c5: Loading layer [==================================================>]  14.91MB/14.91MB
sh 复制代码
# 所有主机开机自启Kubelet
[root@master01 opt]# systemctl enable kubelet
Created symlink from /etc/systemd/system/multi-user.target.wants/kubelet.service to /usr/lib/systemd/system/kubelet.service.

04 安装Nginx+Keepalived

sh 复制代码
# 在两个Master节点安装Nginx+Keepalived
[root@master01 opt]# cat /etc/yum.repos.d/nginx.repo
[nginx-stable]
name=nginx stable repo
baseurl=http://nginx.org/packages/centos/7/$basearch/
gpgcheck=0
enabled=1
gpgkey=https://nginx.org/keys/nginx_signing.key
module_hotfixes=true

[root@master01 opt]# yum -y install epel-release nginx keepalived nginx-mod-stream
sh 复制代码
# 同时在master01和master02修改Nginx配置文件
[root@master02 opt]# cat /etc/nginx/nginx.conf
user nginx;
worker_processes auto;
error_log /var/log/nginx/error.log;
pid /run/nginx.pid;
include /usr/share/nginx/modules/*.conf;
events {
    worker_connections  1024;
}
stream {
    log_format  main  '$remote_addr $upstream_addr - [$time_local] $status $upstream_bytes_sent';
    access_log  /var/log/nginx/k8s-access.log  main;
    upstream k8s-apiserver {
        server 172.2.25.50:6443 weight=5 max_fails=3 fail_timeout=30s;
        server 172.2.25.51:6443 weight=5 max_fails=3 fail_timeout=30s;
    }
    server {
        listen 16443;
        proxy_pass k8s-apiserver;
    }
}
http {
    include       /etc/nginx/mime.types;
    default_type  application/octet-stream;
    log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
                      '$status $body_bytes_sent "$http_referer" '
                      '"$http_user_agent" "$http_x_forwarded_for"';
    access_log  /var/log/nginx/access.log  main;
    sendfile        on;
    #tcp_nopush     on;
    keepalive_timeout  65;
    #gzip  on;
    include /etc/nginx/conf.d/*.conf;
}

# 重启Nginx
[root@master02 opt]# systemctl restart nginx && systemctl enable nginx
sh 复制代码
# 编写Keepalived检查脚本
[root@master01 keepalived]# cat /etc/keepalived/check.sh 
#!/bin/bash
# 定义日志文件路径
LOG_FILE="/var/log/nginx_keepalived_check.log"
MAX_LINES=1000  # 设置日志保留1000行(因为不做限制日志会无限增大,占用大量磁盘空间)
# 记录日志的函数,带有详细的时间格式,并保留最后1000行日志
log_message() {
    local time_stamp=$(date '+%Y-%m-%d %H:%M:%S')  # 定义时间格式
    echo "$time_stamp - $1" >> $LOG_FILE
    # 截取日志文件,只保留最后1000行
    tail -n $MAX_LINES $LOG_FILE > ${LOG_FILE}.tmp && mv ${LOG_FILE}.tmp $LOG_FILE
}
# 检测 Nginx 是否在运行的函数
check_nginx() {
    pgrep -f "nginx: master" > /dev/null 2>&1
    echo $?
}
# 1. 检查 Nginx 是否存活
log_message "正在检查 Nginx 状态..."
if [ $(check_nginx) -ne 0 ]; then
    log_message "Nginx 未运行,尝试启动 Nginx..."
    # 2. 如果 Nginx 不在运行,则尝试启动
    systemctl start nginx
    sleep 2  # 等待 Nginx 启动
    # 3. 再次检查 Nginx 状态
    log_message "启动 Nginx 后再次检查状态..."
    if [ $(check_nginx) -ne 0 ]; then
        log_message "Nginx 启动失败,停止 Keepalived 服务..."
        # 4. 如果 Nginx 启动失败,停止 Keepalived
        systemctl stop keepalived
    else
        log_message "Nginx 启动成功。"
    fi
else
    log_message "Nginx 正常运行。"
fi
sh 复制代码
# 修改Keepalived配置文件
# master01的Keepalived
[root@master01 keepalived]# cat /etc/keepalived/keepalived.conf 
global_defs {
   router_id LVS_DEVEL
}
vrrp_script keepalived_nginx_check { # 这里是上一步骤添加的脚本,在这里进行调用
    script "/etc/keepalived/check.sh" # 根据自己添加的脚本路径进行修改,建议还是放在这个目录下便于管理
}  
vrrp_instance VI_1 {
    state MASTER		# 主修改state为MASTER,备修改为BACKUP
    interface ens33    # 修改自己的实际网卡名称
    virtual_router_id 51  # 主备的虚拟路由ID要相同
    priority 100        # 优先级,备服务器设置优先级比主服务器的优先级低一些
    advert_int 1        # 广播包发送间隔时间为1秒
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        172.2.25.100/24  # 虚拟IP修改为没有占用的IP地址,主备的虚拟IP相同就好
    }
    track_script {
        keepalived_nginx_check # vrrp_script 定义的脚本名,放到这里进行追踪调用,Keepalived 可以根据脚本返回的结果做出相应的动作
    }
}
sh 复制代码
# master02的Keepalived
[root@master02 keepalived]# cat /etc/keepalived/keepalived.conf
global_defs {
   router_id LVS_DEVEL
}

vrrp_script keepalived_nginx_check { # 这里是上一步骤添加的脚本,在这里进行调用
    script "/etc/keepalived/check.sh" # 根据自己添加的脚本路径进行修改,建议还是放在这个目录下便于管理
}  
vrrp_instance VI_1 {
    state BACKUP		# 主修改state为MASTER,备修改为BACKUP
    interface ens33    # 修改自己的实际网卡名称
    virtual_router_id 51  # 主备的虚拟路由ID要相同
    priority 90        # 优先级,备服务器设置优先级比主服务器的优先级低一些
    advert_int 1        # 广播包发送间隔时间为1秒
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        172.2.25.100/24  # 虚拟IP修改为没有占用的IP地址,主备的虚拟IP相同就好
    }
    track_script {
        keepalived_nginx_check # vrrp_script 定义的脚本名,放到这里进行追踪调用,Keepalived 可以根据脚本返回的结果做出相应的动作
    }
}
sh 复制代码
# 重新启动Keepalived和Nginx配置文件
[root@master01 keepalived]# systemctl daemon-reload && systemctl restart nginx 
[root@master01 keepalived]# systemctl restart keepalived && systemctl enable keepalived


图2 查看Master01主机生成的VIP

05 初始化主机节点并添加节点

sh 复制代码
# 在第一台节点上执行初始化操作
[root@master01 opt]# kubeadm init --kubernetes-version=v1.23.17 --image-repository registry.aliyuncs.com/google_containers  --pod-network-cidr=172.22.0.0/16 --service-cidr=172.125.0.0/16  --service-dns-domain=xy.com --apiserver-advertise-address=172.2.25.50  
# 如果有多块物理网卡时 --apiserver-advertise-address很有效,单块网卡无需配置此选项。
sh 复制代码
# 初始化成功,执行这三条命令
  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config


图3 初始化成功,并让Node节点加入

sh 复制代码
# 如果初始化失败执行下面这条命令
[root@master02 ~]# kubeadm reset -f


图4 初始化失败执行这条命令

复制代码
[root@node01 opt]# kubeadm join 172.2.25.50:6443 --token ttsrma.qm8qjiens2h89b1x \
> --discovery-token-ca-cert-hash sha256:6f27f291bf82d27668ce673672eab3e0b5d315e936c413db5f9447e15cdda1f8


图5 添加work节点成功

sh 复制代码
# 拷贝证书到Master02节点,先在master02创建目录
[root@master02 ~]# mkdir -pv /etc/kubernetes/pki/etcd/
# 在Master01拷贝证书到master02节点
[root@master01 ~]# scp -r /etc/kubernetes/pki/ca.crt root@172.2.25.51:/etc/kubernetes/pki/
[root@master01 ~]# scp -r /etc/kubernetes/pki/ca.key root@172.2.25.51:/etc/kubernetes/pki/
[root@master01 ~]# scp -r /etc/kubernetes/pki/sa.key root@172.2.25.51:/etc/kubernetes/pki/
[root@master01 ~]# scp -r /etc/kubernetes/pki/sa.pub root@172.2.25.51:/etc/kubernetes/pki/
[root@master01 ~]# scp -r /etc/kubernetes/pki/front-proxy-ca.crt root@172.2.25.51:/etc/kubernetes/pki/
[root@master01 ~]# scp -r /etc/kubernetes/pki/front-proxy-ca.key root@172.2.25.51:/etc/kubernetes/pki/
[root@master01 ~]# scp -r /etc/kubernetes/pki/etcd/ca.crt root@172.2.25.51:/etc/kubernetes/pki/etcd/
[root@master01 ~]# scp -r /etc/kubernetes/pki/etcd/ca.key root@172.2.25.51:/etc/kubernetes/pki/etcd/


图6 拷贝证书

sh 复制代码
# 在Master01节点生成加入控制节点的Token
[root@master02 keepalived]# rm -rf /etc/kubernetes/pki/ca.crt
[root@master01 keepalived]# kubeadm token create --print-join-command
kubeadm join 172.2.25.50:6443 --token q7yuop.9s8xx2kewdxox52s --discovery-token-ca-cert-hash sha256:6f27f291bf82d27668ce673672eab3e0b5d315e936c413db5f9447e15cdda1f8 


图7 加入控制节点成功

06 配置网络

sh 复制代码
# 编写flannel配置文件
[root@master01 opt]# cat kube-flannel.yml 
apiVersion: v1
kind: Namespace
metadata:
  labels:
    k8s-app: flannel
    pod-security.kubernetes.io/enforce: privileged
  name: kube-flannel
---
apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: flannel
  name: flannel
  namespace: kube-flannel
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: flannel
  name: flannel
rules:
- apiGroups:
  - ""
  resources:
  - pods
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - nodes
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - nodes/status
  verbs:
  - patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: flannel
  name: flannel
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: flannel
subjects:
- kind: ServiceAccount
  name: flannel
  namespace: kube-flannel
---
apiVersion: v1
data:
  cni-conf.json: |
    {
      "name": "cbr0",
      "cniVersion": "0.3.1",
      "plugins": [
        {
          "type": "flannel",
          "delegate": {
            "hairpinMode": true,
            "isDefaultGateway": true
          }
        },
        {
          "type": "portmap",
          "capabilities": {
            "portMappings": true
          }
        }
      ]
    }
  net-conf.json: |
    {
      "Network": "172.22.0.0/16",
      "EnableNFTables": false,
      "Backend": {
        "Type": "vxlan"
      }
    }
kind: ConfigMap
metadata:
  labels:
    app: flannel
    k8s-app: flannel
    tier: node
  name: kube-flannel-cfg
  namespace: kube-flannel
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
  labels:
    app: flannel
    k8s-app: flannel
    tier: node
  name: kube-flannel-ds
  namespace: kube-flannel
spec:
  selector:
    matchLabels:
      app: flannel
      k8s-app: flannel
  template:
    metadata:
      labels:
        app: flannel
        k8s-app: flannel
        tier: node
    spec:
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: kubernetes.io/os
                operator: In
                values:
                - linux
      containers:
      - args:
        - --ip-masq
        - --kube-subnet-mgr
        command:
        - /opt/bin/flanneld
        env:
        - name: POD_NAME
          valueFrom:
            fieldRef:
              fieldPath: metadata.name
        - name: POD_NAMESPACE
          valueFrom:
            fieldRef:
              fieldPath: metadata.namespace
        - name: EVENT_QUEUE_DEPTH
          value: "5000"
        image: docker.io/flannel/flannel:v0.25.6
        name: kube-flannel
        resources:
          requests:
            cpu: 100m
            memory: 50Mi
        securityContext:
          capabilities:
            add:
            - NET_ADMIN
            - NET_RAW
          privileged: false
        volumeMounts:
        - mountPath: /run/flannel
          name: run
        - mountPath: /etc/kube-flannel/
          name: flannel-cfg
        - mountPath: /run/xtables.lock
          name: xtables-lock
      hostNetwork: true
      initContainers:
      - args:
        - -f
        - /flannel
        - /opt/cni/bin/flannel
        command:
        - cp
        image: docker.io/flannel/flannel-cni-plugin:v1.5.1-flannel2
        name: install-cni-plugin
        volumeMounts:
        - mountPath: /opt/cni/bin
          name: cni-plugin
      - args:
        - -f
        - /etc/kube-flannel/cni-conf.json
        - /etc/cni/net.d/10-flannel.conflist
        command:
        - cp
        image: docker.io/flannel/flannel:v0.25.6
        name: install-cni
        volumeMounts:
        - mountPath: /etc/cni/net.d
          name: cni
        - mountPath: /etc/kube-flannel/
          name: flannel-cfg
      priorityClassName: system-node-critical
      serviceAccountName: flannel
      tolerations:
      - effect: NoSchedule
        operator: Exists
      volumes:
      - hostPath:
          path: /run/flannel
        name: run
      - hostPath:
          path: /opt/cni/bin
        name: cni-plugin
      - hostPath:
          path: /etc/cni/net.d
        name: cni
      - configMap:
          name: kube-flannel-cfg
        name: flannel-cfg
      - hostPath:
          path: /run/xtables.lock
          type: FileOrCreate
        name: xtables-lock
sh 复制代码
# 执行yml文件,安装flannel组件
[root@master01 opt]# kubectl apply -f kube-flannel.yml 
namespace/kube-flannel created
serviceaccount/flannel created
clusterrole.rbac.authorization.k8s.io/flannel created
clusterrolebinding.rbac.authorization.k8s.io/flannel created
configmap/kube-flannel-cfg created
daemonset.apps/kube-flannel-ds created


图8 执行flannel安装脚本


图9 看到两个节点为控制平面节点

sh 复制代码
# 此时,我们使用ip a s查看ip的时候会看到flannel.1和cni0网卡
# 如果缺少,使用命令添加
sh 复制代码
# fannel网卡cni0网卡缺失解决方案
		# 问题描述
		# 部分节点不存在cni0网络设备,仅有flannel.1设备,此时我们需要手动创建cni0网桥设备哟。
        # 解决方案
        # 如果有节点没有cni0网卡,建议大家手动创建相应的网桥设备,但是注意网段要一致
	# 手动创建cni0网卡
	# 假设 K8s01的flannel.1是172.22.0.0网段。
sh 复制代码
# 添加网卡,注意网段
[root@node03 ~]# ip link add cni0 type bridge
[root@node03 ~]# ip link set dev cni0 up
[root@node03 ~]# ip addr add 172.22.3.1/24 dev cni0


图10 注意flannel.1和cni0的网段是对应的


图11 可以看到新创建的网卡

07 优化

sh 复制代码
# 使用kubectl工具实现自动补全功能
[root@master01 ~]# kubectl completion bash > ~/.kube/completion.bash.inc
[root@master01 ~]# echo source '$HOME/.kube/completion.bash.inc' >> ~/.bashrc
[root@master01 ~]# source ~/.bashrc


图12 可以看到成功自动补全

sh 复制代码
# K8s巡检脚本
[root@master01 opt]# cat xunjian.sh 
#!/bin/bash
# 定义颜色代码
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
NC='\033[0m' # No Color
# 1. 检查master组件状态
echo -e "${YELLOW}=== 检查master组件状态 ===${NC}"
kubectl get cs 2>/dev/null | while read -r line; do
    if [[ "$line" =~ "NAME" ]]; then
        continue
    fi
    if [ -n "$line" ]; then
        component=$(echo "$line" | awk '{print $1}')
        status=$(echo "$line" | awk '{print $2}')
        if [ "$status" != "Healthy" ]; then
            echo -e "${RED}警告: $component 状态异常: $status${NC}"
        else
            echo -e "${GREEN}$component 状态正常: $status${NC}"
        fi
    fi
done
# 2. 检查工作节点状态
echo -e "\n${YELLOW}=== 检查工作节点状态 ===${NC}"
kubectl get nodes -o wide | while read -r line; do
    if [[ "$line" =~ "NAME" ]]; then
        continue
    fi
    if [ -n "$line" ]; then
        node=$(echo "$line" | awk '{print $1}')
        status=$(echo "$line" | awk '{print $2}')
        if [ "$status" != "Ready" ]; then
            echo -e "${RED}警告: 节点 $node 状态异常: $status${NC}"
        else
            echo -e "${GREEN}节点 $node 状态正常: $status${NC}"
        fi
    fi
done
# 3. 检查flannel组件状态
echo -e "\n${YELLOW}=== 检查flannel组件状态 ===${NC}"
kubectl get pods -n kube-flannel -o wide | while read -r line; do
    if [[ "$line" =~ "NAME" ]]; then
        continue
    fi
    if [ -n "$line" ]; then
        pod=$(echo "$line" | awk '{print $1}')
        status=$(echo "$line" | awk '{print $3}')
        restarts=$(echo "$line" | awk '{print $4}')
        node=$(echo "$line" | awk '{print $7}')
        if [ "$status" != "Running" ]; then
            echo -e "${RED}警告: Pod $pod 在节点 $node 上状态异常: $status (重启次数: $restarts)${NC}"
        else
            echo -e "${GREEN}Pod $pod 在节点 $node 上运行正常: $status (重启次数: $restarts)${NC}"
        fi
    fi
done
# 4. 检查cni0和flannel网卡是否存在
echo -e "\n${YELLOW}=== 检查网络接口 ===${NC}"
for node in $(kubectl get nodes -o jsonpath='{.items[*].metadata.name}'); do
    echo -e "\n${YELLOW}检查节点 $node:${NC}"
    if [ "$node" == "master231" ]; then
        # 如果是本地节点,直接检查
        cni0_exists=$(ip link show cni0 2>/dev/null | grep -q "cni0" && echo "存在" || echo "不存在")
        flannel_exists=$(ip link show flannel.1 2>/dev/null | grep -q "flannel.1" && echo "存在" || echo "不存在")
    else
        # 如果是远程节点,通过SSH检查(需要配置SSH免密登录)
        cni0_exists=$(ssh "$node" "ip link show cni0 2>/dev/null | grep -q 'cni0'" 2>/dev/null && echo "存在" || echo "不存在")
        flannel_exists=$(ssh "$node" "ip link show flannel.1 2>/dev/null | grep -q 'flannel.1'" 2>/dev/null && echo "存在" || echo "不存在")
    fi
    if [ "$cni0_exists" != "存在" ]; then
        echo -e "${RED}警告: 节点 $node 上缺少cni0接口${NC}"
    else
        echo -e "${GREEN}节点 $node 上cni0接口存在${NC}"
    fi
    if [ "$flannel_exists" != "存在" ]; then
        echo -e "${RED}警告: 节点 $node 上缺少flannel.1接口${NC}"
    else
        echo -e "${GREEN}节点 $node 上flannel.1接口存在${NC}"
    fi
done
# 5. 验证网络是否正常
echo -e "\n${YELLOW}=== 验证网络连通性 ===${NC}"
# 测试节点间的网络连通性
nodes=($(kubectl get nodes -o jsonpath='{.items[*].metadata.name}'))
for i in "${!nodes[@]}"; do
    for j in "${!nodes[@]}"; do
        if [ "$i" -ne "$j" ]; then
            src_node="${nodes[$i]}"
            dest_node="${nodes[$j]}"
            echo -e "\n测试从 $src_node 到 $dest_node 的网络连通性..."
            # 获取目标节点的IP
            dest_ip=$(kubectl get nodes "$dest_node" -o jsonpath='{.status.addresses[?(@.type=="InternalIP")].address}')
            if [ "$src_node" == "master231" ]; then
                # 本地节点测试
                ping_result=$(ping -c 3 "$dest_ip" | grep -E "packets transmitted|0% packet loss")
            else
                # 远程节点测试(需要配置SSH免密登录)
                ping_result=$(ssh "$src_node" "ping -c 3 $dest_ip" 2>/dev/null | grep -E "packets transmitted|0% packet loss")
            fi
            if [[ "$ping_result" =~ "0% packet loss" ]]; then
                echo -e "${GREEN}网络连通性正常: $src_node 可以访问 $dest_node ($dest_ip)${NC}"
            else
                echo -e "${RED}警告: 网络连通性问题: $src_node 无法正常访问 $dest_node ($dest_ip)${NC}"
                echo -e "详细信息: $ping_result"
            fi
        fi
    done
done
echo -e "\n${YELLOW}=== 巡检完成 ===${NC}"


图13 巡检结果

相关推荐
Swift社区1 小时前
从 JDK 1.8 切换到 JDK 21 时遇到 NoProviderFoundException 该如何解决?
java·开发语言
DKPT2 小时前
JVM中如何调优新生代和老生代?
java·jvm·笔记·学习·spring
phltxy2 小时前
JVM——Java虚拟机学习
java·jvm·学习
seabirdssss4 小时前
使用Spring Boot DevTools快速重启功能
java·spring boot·后端
喂完待续4 小时前
【序列晋升】29 Spring Cloud Task 微服务架构下的轻量级任务调度框架
java·spring·spring cloud·云原生·架构·big data·序列晋升
benben0444 小时前
ReAct模式解读
java·ai
衍余未了4 小时前
centos9 docker启动不起来,docker启动发生堵塞问题!
运维·docker·容器
轮到我狗叫了5 小时前
牛客.小红的子串牛客.kotori和抽卡牛客.循环汉诺塔牛客.ruby和薯条
java·开发语言·算法
我真的是大笨蛋5 小时前
K8S-基础架构
笔记·云原生·容器·kubernetes