准备2台虚拟机实验

shell 复制代码

172.16.157.129	//作为master
172.16.157.130	//作为node

1.关闭selinux

shell 复制代码

vim /etc/selinux/config

shell 复制代码

SELINUX=disable

2.关闭 firewalld

shell 复制代码

systemctl disable firewalld.service

3.修改hosts

shell 复制代码

tee -a /etc/hosts <<EOF
172.16.157.129 k8s-master-01
172.16.157.130 k8s-worker-01
EOF

4.关闭swap

shell 复制代码

vim /etc/fstab

shell 复制代码

# /dev/mapper/rl_172-swap none                    swap    defaults        0 0

5.加载内核模块

shell 复制代码

# 1. 加载必需的内核模块
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

# 立即加载模块
sudo modprobe overlay
sudo modprobe br_netfilter

# 2. 配置网络桥接和转发（您提到的部分）
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF

# 3. 让配置生效
sudo sysctl --system

验证所有配置

shell 复制代码

# 检查模块
lsmod | grep -E "(overlay|br_netfilter)"

# 检查网络参数
sysctl net.bridge.bridge-nf-call-iptables net.bridge.bridge-nf-call-ip6tables net.ipv4.ip_forward

shell 复制代码

reboot

6.Docker CE 仓库

(/etc/yum.repos.d/docker-ce.repo)

shell 复制代码

sudo tee /etc/yum.repos.d/docker-ce.repo <<EOF
[docker-ce-stable]
name=Docker CE Stable
baseurl=https://mirrors.aliyun.com/docker-ce/linux/centos/$(rpm -E %rhel)/x86_64/stable
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/docker-ce/linux/centos/gpg
EOF

shell 复制代码

# 安装 containerd.io
sudo yum install -y containerd.io

# 配置 containerd
sudo containerd config default | sudo tee /etc/containerd/config.toml

# 修改为使用 systemd cgroup（重要！）
sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml

# 修改 sandbox 为国内源（加速下载）
sudo sed -i 's|registry.k8s.io/pause|registry.aliyuncs.com/google_containers/pause|g' /etc/containerd/config.toml


# 重启 containerd
systemctl restart containerd
systemctl status containerd
systemctl enable containerd

因为没有安装docker，为了调试容器，创建配置文件

替换

shell 复制代码

sudo tee /etc/crictl.yaml <<'EOF'
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
timeout: 10
debug: false
EOF

7.Kubernetes 仓库

(/etc/yum.repos.d/kubernetes.repo)

shell 复制代码

cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/repodata/repomd.xml.key
EOF

shell 复制代码

# 安装 Kubernetes 组件
yum install -y kubelet kubeadm kubectl

# 启动 kubelet
sudo systemctl enable kubelet && sudo systemctl start kubelet

预拉取 Kubernetes 集群初始化所需的所有镜像

shell 复制代码

kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers

shell 复制代码

kubeadm init \
  --image-repository=registry.aliyuncs.com/google_containers \
  --pod-network-cidr=10.244.0.0/16
  


# 或者使用自己创建的配置文件初始化
#sudo kubeadm init --config=kubeadm-config.yaml

如果失败了

shell 复制代码

# 1. 重置 kubeadm
sudo kubeadm reset -f

# 2. 清理网络配置
sudo rm -rf /etc/cni/net.d

# 3. 清理 iptables 规则
sudo iptables -F && sudo iptables -t nat -F && sudo iptables -t mangle -F && sudo iptables -X

# 4. 重启 kubelet 和 containerd
sudo systemctl restart containerd
sudo systemctl restart kubelet

# 5. 检查 kubelet 状态（现在应该正常了）
sudo systemctl status kubelet

成功

shell 复制代码

[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 172.16.157.129:6443 --token dlzfpi.q0prlokpqwa97hx4 \
        --discovery-token-ca-cert-hash sha256:c944839158db334e6bc806f4806d3227ddb778245a5e1eeda40fc0f198f0899a

8.安装calico网络插件

calico|g' calico.yaml

shell 复制代码

curl -O https://raw.githubusercontent.com/projectcalico/calico/v3.27.0/manifests/calico.yaml

shell 复制代码

# 备份原文件
cp calico.yaml calico.yaml.bak


# 替换所有 docker.io/calico → quay.io/calico
sed -i 's|docker.io/calico|quay.io/calico|g' calico.yaml

手动下载镜像

shell 复制代码

# 核心镜像：
crictl pull quay.io/calico/node:v3.27.0
crictl pull quay.io/calico/cni:v3.27.0  
crictl pull quay.io/calico/kube-controllers:v3.27.0

# 附加镜像：
crictl pull quay.io/calico/pod2daemon-flexvol:v3.27.0
crictl pull quay.io/calico/csi:v3.27.0
crictl pull quay.io/calico/typha:v3.27.0
crictl pull quay.io/calico/node-driver-registrar:v3.27.0

shell 复制代码

# 3. 应用新配置
kubectl apply -f calico.yaml


# 4. 观察状态
kubectl get pods -n kube-system -w

9.安装harbor证书

使用 containerd 的 certs.d 机制

shell 复制代码

# 创建 registry 专属证书目录
sudo mkdir -p /etc/containerd/certs.d/192.168.31.130

# 放入证书 /tmp/harbor-ca.crt 为从harbor机器上手动上传的
sudo cp /tmp/harbor-ca.crt /etc/containerd/certs.d/192.168.31.130/ca.crt

📁 路径格式：/etc/containerd/certs.d/<registry-host>/ca.crt

确保 containerd 启用了 config_path（关键！）

shell 复制代码

vim /etc/containerd/config.toml

toml 复制代码

    [plugins.'io.containerd.cri.v1.images'.registry]
      config_path = ''

修改如下

toml 复制代码

    [plugins.'io.containerd.cri.v1.images'.registry]
      config_path = '/etc/containerd/certs.d'

10.安装dashbord

shell 复制代码

curl -LO https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml

vim recommended.yaml

找到

yaml 复制代码

kind: Service
apiVersion: v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  ports:
    - port: 443
      targetPort: 8443
  selector:
    k8s-app: kubernetes-dashboard

修改如下

yaml 复制代码

kind: Service
apiVersion: v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  type: NodePort # NodePort模式
  ports:
    - port: 443
      targetPort: 8443
      nodePort: 30443  # ← 显式指定（必须在 30000-32767 范围内）
  selector:
    k8s-app: kubernetes-dashboard

还有两处镜像地址也要修改为可用的

yaml 复制代码

image: kubernetesui/dashboard:v2.7.0
#我改为我本地的镜像了
image: 192.168.31.130/development/dashboard:v2.7.0

yaml 复制代码

image: kubernetesui/metrics-scraper:v1.0.8
#我改为我本地的镜像了
image: 192.168.31.130/development/metrics-scraper:v1.0.8

允许污点，两处tolerations修改如下

yaml 复制代码

      tolerations:
        - key: node-role.kubernetes.io/master
          effect: NoSchedule
        - key: node-role.kubernetes.io/control-plane  # 👈 新增这两行
          effect: NoSchedule

安装

shell 复制代码

kubectl apply -f recommended.yaml

查看

shell 复制代码

[root@172 192.168.31.130]# kubectl get pod -A
NAMESPACE              NAME                                         READY   STATUS    RESTARTS   AGE
kube-system            calico-kube-controllers-86778b9f8c-8ctqw     1/1     Running   0          22h
kube-system            calico-node-bnvgn                            1/1     Running   0          22h
kube-system            coredns-66f779496c-6cfqj                     1/1     Running   0          22h
kube-system            coredns-66f779496c-8j7j2                     1/1     Running   0          22h
kube-system            etcd-172.16.157.129                          1/1     Running   0          22h
kube-system            kube-apiserver-172.16.157.129                1/1     Running   0          22h
kube-system            kube-controller-manager-172.16.157.129       1/1     Running   0          22h
kube-system            kube-proxy-9cddd                             1/1     Running   0          22h
kube-system            kube-scheduler-172.16.157.129                1/1     Running   0          22h
kubernetes-dashboard   dashboard-metrics-scraper-79db99bf88-x2znj   1/1     Running   0          22m
kubernetes-dashboard   kubernetes-dashboard-5f6f8d5bdb-bqmkz        1/1     Running   0          22m
[root@172 192.168.31.130]# kubectl -n kubernetes-dashboard get svc kubernetes-dashboard
NAME                   TYPE       CLUSTER-IP     EXTERNAL-IP   PORT(S)         AGE
kubernetes-dashboard   NodePort   10.99.14.197   <none>        443:30443/TCP   35m

浏览器访问： https://:30443

创建管理员用户 admin-user.yaml

yaml 复制代码

# admin-user.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: admin-user
  namespace: kubernetes-dashboard
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: admin-user
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: admin-user
  namespace: kubernetes-dashboard

应用并获取 Token：

shell 复制代码

kubectl apply -f admin-user.yaml
kubectl -n kubernetes-dashboard create token admin-user

复制输出的 Token，在 Dashboard 登录页选择 Token 方式粘贴即可。

删除 admin-user的方式

shell 复制代码

kubectl -n kubernetes-dashboard delete sa admin-user
kubectl delete clusterrolebinding admin-user

创建开发者账号 dev-user.yaml

yaml 复制代码

---
# 1. 创建 ServiceAccount（在 default 命名空间）
apiVersion: v1
kind: ServiceAccount
metadata:
  name: dev-user
  namespace: default

---
# 2. 定义 Role（仅限 default 命名空间）
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  namespace: default
  name: dev-role
rules:
- apiGroups: [""]
  resources: ["pods", "services", "configmaps", "persistentvolumeclaims"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["apps"]
  resources: ["deployments", "replicasets", "daemonsets", "statefulsets"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["batch"]
  resources: ["jobs", "cronjobs"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["networking.k8s.io"]
  resources: ["ingresses"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]

---
# 3. 绑定 Role 到 ServiceAccount
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: dev-rolebinding
  namespace: default
subjects:
- kind: ServiceAccount
  name: dev-user
  namespace: default
roleRef:
  kind: Role
  name: dev-role
  apiGroup: rbac.authorization.k8s.io

应用配置

shell 复制代码

kubectl apply -f dev-user.yaml

获取登录 Token（用于 Dashboard 或 kubectl）

shell 复制代码

kubectl -n default create token dev-user

11.开机脚本

All-in-One 开机自愈脚本startup-k8s.sh

shell 复制代码

vim /usr/local/bin/startup-k8s.sh

bash 复制代码

#!/bin/bash
# startup-k8s.sh - All-in-One Kubernetes 开机自愈脚本
# 放在 /usr/local/bin/startup-k8s.sh，并设置为开机启动（可选）

set -e

echo "🚀 Kubernetes 开机自愈流程启动..."

# 获取节点名
NODE_NAME=$(hostname)

# 1. 确保 kubelet 和 containerd 正在运行
echo "✅ 确保容器运行时和 kubelet 已启动..."
sudo systemctl enable --now containerd kubelet

# 2. 等待 kubelet 就绪（最多 60 秒）
echo "⏳ 等待 kubelet 就绪..."
for i in {1..30}; do
  if sudo crictl info &>/dev/null; then
    echo "   ✅ containerd 就绪"
    break
  fi
  sleep 2
done

# 3. 等待 API Server 可用
echo "⏳ 等待 Kubernetes API 就绪..."
for i in {1..60}; do
  if kubectl get nodes &>/dev/null; then
    echo "   ✅ API Server 就绪"
    break
  fi
  sleep 2
done

# 4. 自动 uncordon（解除 SchedulingDisabled）
echo "🔧 检查并解除节点调度禁用..."
if kubectl get node "$NODE_NAME" -o jsonpath='{.spec.unschedulable}' 2>/dev/null | grep -q 'true'; then
  kubectl uncordon "$NODE_NAME"
  echo "   ✅ 节点已恢复调度"
else
  echo "   ℹ️ 节点未被 cordoned，跳过"
fi

# 5. 自动移除 Master 污点（All-in-One 必须！）
echo "🧹 检查并移除 control-plane 污点..."
if kubectl describe node "$NODE_NAME" | grep -q 'node-role.kubernetes.io/control-plane:NoSchedule'; then
  kubectl taint nodes "$NODE_NAME" node-role.kubernetes.io/control-plane:NoSchedule- 2>/dev/null || true
  echo "   ✅ Master 污点已移除"
else
  echo "   ℹ️ 污点不存在，跳过"
fi

# 6. 等待 CoreDNS 就绪（可选，用于确认集群完全可用）
echo "⏳ 等待 CoreDNS 就绪..."
for i in {1..30}; do
  if kubectl get pod -n kube-system -l k8s-app=kube-dns --field-selector=status.phase=Running --no-headers | grep -q 'Running'; then
    echo "   ✅ CoreDNS 已运行，集群完全就绪！"
    break
  fi
  sleep 3
done

echo "🎉 Kubernetes All-in-One 集群已自愈完成！"
echo "👉 执行 'kubectl get pod -A' 查看状态"

shell 复制代码

sudo chmod +x /usr/local/bin/startup-k8s.sh

12.关机脚本

All-in-One 关机脚本（通用版）

复制代码

vim /usr/local/bin/shutdown-k8s.sh

shell 复制代码

#!/bin/bash
# shutdown-k8s.sh - 适用于 kubeadm / k3s 单节点集群

set -e

echo "🔄  开始 Kubernetes 优雅关机流程..."

# 1. 获取当前节点名（兼容 kubeadm/k3s）
NODE_NAME=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || hostname)

echo "📍  节点名称: $NODE_NAME"

# 2. 将节点标记为不可调度（cordon）
echo "⏸️   标记节点为不可调度..."
kubectl cordon "$NODE_NAME" 2>/dev/null || echo "⚠️  kubectl 不可用，跳过 cordon"

# 3. 驱逐所有 Pod（保留 DaemonSet，删除临时数据）
echo "🧹  驱逐工作负载 Pod..."
kubectl drain "$NODE_NAME" \
  --ignore-daemonsets \
  --delete-emptydir-data \
  --force \
  --grace-period=30 \
  2>/dev/null || echo "⚠️  drain 失败，继续关机（可能无工作负载）"

# 4. （可选）停止 kubelet 和容器运行时（防止关机时写入）
echo "⏹️   停止 kubelet 和 containerd..."
sudo systemctl stop kubelet containerd 2>/dev/null || echo "⚠️  服务停止失败，继续关机"

# 5. 执行关机
echo "🔌  正在关机..."
sudo shutdown -h now

shell 复制代码

sudo chmod +x /usr/local/bin/shutdown-k8s.sh

Rocky安装k8s

1.关闭selinux

2.关闭 firewalld

3.修改hosts

4.关闭swap

5.加载内核模块

6.Docker CE 仓库

7.Kubernetes 仓库

8.安装calico网络插件

备份原文件 cp calico.yaml calico.yaml.bak # 替换所有 docker.io/calico → quay.io/calico sed -i 's|docker.io/calico|quay.io/calico|g' calico.yaml

9.安装harbor证书

10.安装dashbord

11.开机脚本

12.关机脚本