1、环境准备
-
操作系统: CentOS 7.9
-
Kubernetes版本: 1.24.17
-
容器运行时: Containerd
-
网络插件: Calico 3.25
-
集群规模: 3 Master(后续有 worker 加入集群即可)
-
地址规划:
|--------------|-----------------|
| Master01 | 192.168.140.133 |
| Master02 | 192.168.140.134 |
| Master03 | 192.168.140.135 |
| VIP | 192.168.140.200 |
2、所有节点基础环境配置
2.1、配置主机名和 hosts 文件
bash
# 设置主机名(根据实际角色执行)
hostnamectl set-hostname master01 # master02, master03
# 配置 hosts 文件
cat >> /etc/hosts << EOF
192.168.140.133 master01
192.168.140.134 master02
192.168.140.135 master03
EOF
2.2、禁用 SELinux 和防火墙
bash
# 临时禁用 SELinux
setenforce 0
# 永久禁用
sed -i 's/^SELINUX=enforcing$/SELINUX=disabled/' /etc/selinux/config
# 停止并禁用防火墙
systemctl stop firewalld
systemctl disable firewalld
2.3、禁用 Swap
bash
# 临时禁用
swapoff -a
# 永久禁用 - 注释 /etc/fstab 中的 swap 行
sed -i '/swap/d' /etc/fstab
2.4、配置内核参数
bash
cat > /etc/sysctl.d/99-kubernetes.conf << EOF
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
vm.swappiness = 0
EOF
# 加载内核模块
modprobe br_netfilter
modprobe overlay
# 应用配置
sysctl --system
2.5、配置时间同步
bash
# 安装 chrony
yum install -y chrony
# 启动并设置开机自启
systemctl enable --now chronyd
# 验证时间同步
chronyc sources
2.6、安装 Containerd
bash
# 安装依赖
yum install -y yum-utils device-mapper-persistent-data lvm2
# 配置 Docker CE 仓库(Containerd 可从该仓库获取)
yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.repo
# 安装 containerd
yum install -y containerd.io
# 生成默认配置文件
mkdir -p /etc/containerd
containerd config default > /etc/containerd/config.toml
# 修改配置:使用 systemd cgroup 驱动
sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml
# 修改 sandbox 镜像地址(解决国内拉取问题)
sed -i 's|registry.k8s.io/pause|registry.cn-hangzhou.aliyuncs.com/google_containers/pause|g' /etc/containerd/config.toml
# 启动 containerd
systemctl enable --now containerd
2.7、安装 Kubernetes 组件
bash
# 配置 Kubernetes 仓库
cat > /etc/yum.repos.d/kubernetes.repo << EOF
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=1
repo_gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF
# 安装指定版本
yum install -y kubelet-1.24.17 kubeadm-1.24.17 kubectl-1.24.17
# 锁定版本防止自动更新
yum versionlock kubelet kubeadm kubectl
3、配置高可用入口(HAProxy + Keepalived)
- 仅在 Master 节点上执行
3.1、安装 HAProxy 和 Keepalived
bash
# 安装
yum install -y haproxy keepalived
3.2、配置HAProxy
bash
cat > /etc/haproxy/haproxy.cfg << 'EOF'
global
log /dev/log local0
maxconn 4096
daemon
defaults
log global
mode tcp
retries 3
timeout connect 10s
timeout client 30s
timeout server 30s
frontend kubernetes-api
bind *:16443
mode tcp
default_backend kubernetes-api-servers
backend kubernetes-api-servers
mode tcp
balance roundrobin
option tcp-check
server master01 192.168.140.133:6443 check fall 3 rise 2
server master02 192.168.140.134:6443 check fall 3 rise 2
server master03 192.167.140.135:6443 check fall 3 rise 2
EOF
3.3、配置 Keepalived
- 在 master01 上配置(优先级最高)
bash
cat > /etc/keepalived/keepalived.conf << 'EOF'
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass k8s@123
}
virtual_ipaddress {
192.168.140.200/24
}
track_script {
check_apiserver
}
}
EOF
- 在 master02 上配置(优先级次高)
bash
cat > /etc/keepalived/keepalived.conf << 'EOF'
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 90
advert_int 1
authentication {
auth_type PASS
auth_pass k8s@123
}
virtual_ipaddress {
192.168.140.200/24
}
track_script {
check_apiserver
}
}
EOF
- 在 master03 上配置(优先级最低)
bash
cat > /etc/keepalived/keepalived.conf << 'EOF'
global_defs {
router_id LVS_DEVEL
}
vrrp_script check_apiserver {
script "/etc/keepalived/check_apiserver.sh"
interval 3
weight -2
fall 10
rise 2
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 51
priority 80
advert_int 1
authentication {
auth_type PASS
auth_pass k8s@123
}
virtual_ipaddress {
192.168.140.200/24
}
track_script {
check_apiserver
}
}
EOF
3.4、创建健康检查脚本(所有 Master 节点)
bash
cat > /etc/keepalived/check_apiserver.sh << 'EOF'
#!/bin/bash
if [ -f /etc/kubernetes/admin.conf ]; then
curl --silent --max-time 2 --insecure https://localhost:6443/healthz > /dev/null 2>&1
exit $?
else
# kubelet 未启动时检查本地端口
nc -zv localhost 6443 > /dev/null 2>&1
exit $?
fi
EOF
chmod +x /etc/keepalived/check_apiserver.sh
3.5、启动 HAProxy 和 Keepalived
bash
# 启动并设置开机自启
systemctl enable --now haproxy
systemctl enable --now keepalived
# 验证服务状态
systemctl status haproxy
systemctl status keepalived
4、初始化第一个 Master 节点
- 仅在 master01 上执行
4.1、创建 kubeadm 配置文件
bash
cat > kubeadm-init-config.yaml << EOF
apiVersion: kubeadm.k8s.io/v1beta3
kind: InitConfiguration
nodeRegistration:
name: master01
criSocket: unix:///run/containerd/containerd.sock
imagePullPolicy: IfNotPresent
---
apiVersion: kubeadm.k8s.io/v1beta3
kind: ClusterConfiguration
clusterName: kubernetes
kubernetesVersion: 1.24.17
controlPlaneEndpoint: "192.168.140.200:16443"
networking:
dnsDomain: cluster.local
podSubnet: 10.244.0.0/16
serviceSubnet: 10.96.0.0/12
imageRepository: registry.cn-hangzhou.aliyuncs.com/google_containers
etcd:
local:
dataDir: /var/lib/etcd
apiServer:
timeoutForControlPlane: 4m0s
---
apiVersion: kubeproxy.config.k8s.io/v1alpha1
kind: KubeProxyConfiguration
mode: ipvs
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
EOF
4.2、初始化集群
bash
# 预拉取镜像
kubeadm config images pull --config=kubeadm-init-config.yaml
# 初始化集群
kubeadm init --config=kubeadm-init-config.yaml --upload-certs
4.3、配置 kubectl
bash
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
# 验证集群状态
kubectl get nodes
kubectl get pods -n kube-system
# 保存初始化输出中的关键信息:
## kubeadm join 命令(用于添加其他 Master,含参数 --certificate-key)
### kubeadm join 192.168.140.200:6443 --token <token> \
### --discovery-token-ca-cert-hash sha256:<hash> \
### --control-plane --certificate-key <certificate-key>
## kubeadm join 命令(用于添加 Worker)
### kubeadm join 192.168.140.200:6443 --token <token> \
### --discovery-token-ca-cert-hash sha256:<hash>
5、加入其他 Master
- 在 master02 和 master03 上执行
bash
# 加入集群
# 使用第一步初始化时输出的命令,格式如下:
kubeadm join 192.168.140.200:6443 --token <token> \
--discovery-token-ca-cert-hash sha256:<hash> \
--control-plane --certificate-key <certificate-key>
# 配置 kubectl
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
6、加入其他 Worker 节点
bash
# 加入集群
# 使用初始化输出的 worker 加入命令
kubeadm join 10.0.0.200:6443 --token <token> \
--discovery-token-ca-cert-hash sha256:<hash>
# 如果 token 已过期,在 master01 上重新生成
kubeadm token create --print-join-command
7、安装 Calico 网络插件
- 仅在 master01 上执行
bash
# 安装 Calico 网络插件
kubectl apply -f https://projectcalico.docs.tigera.io/v3.25/manifests/calico.yaml
# (可选)master 去污点
kubectl taint node master01 node-role.kubernetes.io/control-plane-
8、验证
8.1、查看集群状态
bash
# 查看节点
kubectl get nodes -o wide
# 查看 pod
kubectl get pods -n kube-system
8.2、验证高可用
bash
# 查看 VIP 所在节点
ip a show | grep 192.168.140.200
# 模拟故障:在 VIP 所在 master 上停止 keepalived
systemctl stop keepalived
# VIP 应自动漂移到其他 master
ip a show | grep 192.168.140.200
# 在 VIP 所在 master 上停止 kubelet
# 验证集群状态依旧正常,相关资源依旧可用
systemctl stop kubelet
# 恢复 keepalived
systemctl start keepalived