Rocky安装k8s

准备2台虚拟机实验

shell 复制代码
172.16.157.129	//作为master
172.16.157.130	//作为node

1.关闭selinux

shell 复制代码
vim /etc/selinux/config
shell 复制代码
SELINUX=disable

2.关闭 firewalld

shell 复制代码
systemctl disable firewalld.service

3.修改hosts

shell 复制代码
tee -a /etc/hosts <<EOF
172.16.157.129 k8s-master-01
172.16.157.130 k8s-worker-01
EOF

4.关闭swap

shell 复制代码
vim /etc/fstab
shell 复制代码
# /dev/mapper/rl_172-swap none                    swap    defaults        0 0

5.加载内核模块

shell 复制代码
# 1. 加载必需的内核模块
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF

# 立即加载模块
sudo modprobe overlay
sudo modprobe br_netfilter

# 2. 配置网络桥接和转发(您提到的部分)
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
EOF

# 3. 让配置生效
sudo sysctl --system

验证所有配置

shell 复制代码
# 检查模块
lsmod | grep -E "(overlay|br_netfilter)"

# 检查网络参数
sysctl net.bridge.bridge-nf-call-iptables net.bridge.bridge-nf-call-ip6tables net.ipv4.ip_forward
shell 复制代码
reboot

6.Docker CE 仓库

(/etc/yum.repos.d/docker-ce.repo)

shell 复制代码
sudo tee /etc/yum.repos.d/docker-ce.repo <<EOF
[docker-ce-stable]
name=Docker CE Stable
baseurl=https://mirrors.aliyun.com/docker-ce/linux/centos/$(rpm -E %rhel)/x86_64/stable
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/docker-ce/linux/centos/gpg
EOF
shell 复制代码
# 安装 containerd.io
sudo yum install -y containerd.io

# 配置 containerd
sudo containerd config default | sudo tee /etc/containerd/config.toml

# 修改为使用 systemd cgroup(重要!)
sudo sed -i 's/SystemdCgroup = false/SystemdCgroup = true/g' /etc/containerd/config.toml

# 修改 sandbox 为国内源(加速下载)
sudo sed -i 's|registry.k8s.io/pause|registry.aliyuncs.com/google_containers/pause|g' /etc/containerd/config.toml


# 重启 containerd
systemctl restart containerd
systemctl status containerd
systemctl enable containerd

因为没有安装docker,为了调试容器,创建配置文件

替换

shell 复制代码
sudo tee /etc/crictl.yaml <<'EOF'
runtime-endpoint: unix:///run/containerd/containerd.sock
image-endpoint: unix:///run/containerd/containerd.sock
timeout: 10
debug: false
EOF

7.Kubernetes 仓库

(/etc/yum.repos.d/kubernetes.repo)

shell 复制代码
cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/
enabled=1
gpgcheck=1
gpgkey=https://mirrors.aliyun.com/kubernetes-new/core/stable/v1.28/rpm/repodata/repomd.xml.key
EOF
shell 复制代码
# 安装 Kubernetes 组件
yum install -y kubelet kubeadm kubectl

# 启动 kubelet
sudo systemctl enable kubelet && sudo systemctl start kubelet

预拉取 Kubernetes 集群初始化所需的所有镜像

shell 复制代码
kubeadm config images pull --image-repository=registry.aliyuncs.com/google_containers
shell 复制代码
kubeadm init \
  --image-repository=registry.aliyuncs.com/google_containers \
  --pod-network-cidr=10.244.0.0/16
  


# 或者使用自己创建的配置文件初始化
#sudo kubeadm init --config=kubeadm-config.yaml

如果失败了

shell 复制代码
# 1. 重置 kubeadm
sudo kubeadm reset -f

# 2. 清理网络配置
sudo rm -rf /etc/cni/net.d

# 3. 清理 iptables 规则
sudo iptables -F && sudo iptables -t nat -F && sudo iptables -t mangle -F && sudo iptables -X

# 4. 重启 kubelet 和 containerd
sudo systemctl restart containerd
sudo systemctl restart kubelet

# 5. 检查 kubelet 状态(现在应该正常了)
sudo systemctl status kubelet

成功

shell 复制代码
[addons] Applied essential addon: CoreDNS
[addons] Applied essential addon: kube-proxy

Your Kubernetes control-plane has initialized successfully!

To start using your cluster, you need to run the following as a regular user:

  mkdir -p $HOME/.kube
  sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
  sudo chown $(id -u):$(id -g) $HOME/.kube/config

Alternatively, if you are the root user, you can run:

  export KUBECONFIG=/etc/kubernetes/admin.conf

You should now deploy a pod network to the cluster.
Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
  https://kubernetes.io/docs/concepts/cluster-administration/addons/

Then you can join any number of worker nodes by running the following on each as root:

kubeadm join 172.16.157.129:6443 --token dlzfpi.q0prlokpqwa97hx4 \
        --discovery-token-ca-cert-hash sha256:c944839158db334e6bc806f4806d3227ddb778245a5e1eeda40fc0f198f0899a 

8.安装calico网络插件

备份原文件 cp calico.yaml calico.yaml.bak # 替换所有 docker.io/calicoquay.io/calico sed -i 's|docker.io/calico|quay.io/calico|g' calico.yaml

shell 复制代码
curl -O https://raw.githubusercontent.com/projectcalico/calico/v3.27.0/manifests/calico.yaml
shell 复制代码
# 备份原文件
cp calico.yaml calico.yaml.bak


# 替换所有 docker.io/calico → quay.io/calico
sed -i 's|docker.io/calico|quay.io/calico|g' calico.yaml

手动下载镜像

shell 复制代码
# 核心镜像:
crictl pull quay.io/calico/node:v3.27.0
crictl pull quay.io/calico/cni:v3.27.0  
crictl pull quay.io/calico/kube-controllers:v3.27.0

# 附加镜像:
crictl pull quay.io/calico/pod2daemon-flexvol:v3.27.0
crictl pull quay.io/calico/csi:v3.27.0
crictl pull quay.io/calico/typha:v3.27.0
crictl pull quay.io/calico/node-driver-registrar:v3.27.0
shell 复制代码
# 3. 应用新配置
kubectl apply -f calico.yaml


# 4. 观察状态
kubectl get pods -n kube-system -w

9.安装harbor证书

使用 containerd 的 certs.d 机制

shell 复制代码
# 创建 registry 专属证书目录
sudo mkdir -p /etc/containerd/certs.d/192.168.31.130

# 放入证书 /tmp/harbor-ca.crt 为从harbor机器上手动上传的
sudo cp /tmp/harbor-ca.crt /etc/containerd/certs.d/192.168.31.130/ca.crt

📁 路径格式:/etc/containerd/certs.d/<registry-host>/ca.crt

确保 containerd 启用了 config_path(关键!)

shell 复制代码
vim /etc/containerd/config.toml
toml 复制代码
    [plugins.'io.containerd.cri.v1.images'.registry]
      config_path = ''

修改如下

toml 复制代码
    [plugins.'io.containerd.cri.v1.images'.registry]
      config_path = '/etc/containerd/certs.d'

10.安装dashbord

shell 复制代码
curl -LO https://raw.githubusercontent.com/kubernetes/dashboard/v2.7.0/aio/deploy/recommended.yaml

vim recommended.yaml

找到

yaml 复制代码
kind: Service
apiVersion: v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  ports:
    - port: 443
      targetPort: 8443
  selector:
    k8s-app: kubernetes-dashboard

修改如下

yaml 复制代码
kind: Service
apiVersion: v1
metadata:
  labels:
    k8s-app: kubernetes-dashboard
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
spec:
  type: NodePort # NodePort模式
  ports:
    - port: 443
      targetPort: 8443
      nodePort: 30443  # ← 显式指定(必须在 30000-32767 范围内)
  selector:
    k8s-app: kubernetes-dashboard

还有两处镜像地址也要修改为可用的

yaml 复制代码
image: kubernetesui/dashboard:v2.7.0
#我改为我本地的镜像了
image: 192.168.31.130/development/dashboard:v2.7.0
yaml 复制代码
image: kubernetesui/metrics-scraper:v1.0.8
#我改为我本地的镜像了
image: 192.168.31.130/development/metrics-scraper:v1.0.8

允许污点,两处tolerations修改如下

yaml 复制代码
      tolerations:
        - key: node-role.kubernetes.io/master
          effect: NoSchedule
        - key: node-role.kubernetes.io/control-plane  # 👈 新增这两行
          effect: NoSchedule

安装

shell 复制代码
kubectl apply -f recommended.yaml

查看

shell 复制代码
[root@172 192.168.31.130]# kubectl get pod -A
NAMESPACE              NAME                                         READY   STATUS    RESTARTS   AGE
kube-system            calico-kube-controllers-86778b9f8c-8ctqw     1/1     Running   0          22h
kube-system            calico-node-bnvgn                            1/1     Running   0          22h
kube-system            coredns-66f779496c-6cfqj                     1/1     Running   0          22h
kube-system            coredns-66f779496c-8j7j2                     1/1     Running   0          22h
kube-system            etcd-172.16.157.129                          1/1     Running   0          22h
kube-system            kube-apiserver-172.16.157.129                1/1     Running   0          22h
kube-system            kube-controller-manager-172.16.157.129       1/1     Running   0          22h
kube-system            kube-proxy-9cddd                             1/1     Running   0          22h
kube-system            kube-scheduler-172.16.157.129                1/1     Running   0          22h
kubernetes-dashboard   dashboard-metrics-scraper-79db99bf88-x2znj   1/1     Running   0          22m
kubernetes-dashboard   kubernetes-dashboard-5f6f8d5bdb-bqmkz        1/1     Running   0          22m
[root@172 192.168.31.130]# kubectl -n kubernetes-dashboard get svc kubernetes-dashboard
NAME                   TYPE       CLUSTER-IP     EXTERNAL-IP   PORT(S)         AGE
kubernetes-dashboard   NodePort   10.99.14.197   <none>        443:30443/TCP   35m

浏览器访问: https://:30443

创建管理员用户 admin-user.yaml

yaml 复制代码
# admin-user.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
  name: admin-user
  namespace: kubernetes-dashboard
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: admin-user
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: admin-user
  namespace: kubernetes-dashboard

应用并获取 Token:

shell 复制代码
kubectl apply -f admin-user.yaml
kubectl -n kubernetes-dashboard create token admin-user

复制输出的 Token,在 Dashboard 登录页选择 Token 方式粘贴即可。

删除 admin-user的方式

shell 复制代码
kubectl -n kubernetes-dashboard delete sa admin-user
kubectl delete clusterrolebinding admin-user

创建开发者账号 dev-user.yaml

yaml 复制代码
---
# 1. 创建 ServiceAccount(在 default 命名空间)
apiVersion: v1
kind: ServiceAccount
metadata:
  name: dev-user
  namespace: default

---
# 2. 定义 Role(仅限 default 命名空间)
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  namespace: default
  name: dev-role
rules:
- apiGroups: [""]
  resources: ["pods", "services", "configmaps", "persistentvolumeclaims"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["apps"]
  resources: ["deployments", "replicasets", "daemonsets", "statefulsets"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["batch"]
  resources: ["jobs", "cronjobs"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]
- apiGroups: ["networking.k8s.io"]
  resources: ["ingresses"]
  verbs: ["get", "list", "watch", "create", "update", "patch", "delete"]

---
# 3. 绑定 Role 到 ServiceAccount
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: dev-rolebinding
  namespace: default
subjects:
- kind: ServiceAccount
  name: dev-user
  namespace: default
roleRef:
  kind: Role
  name: dev-role
  apiGroup: rbac.authorization.k8s.io

应用配置

shell 复制代码
kubectl apply -f dev-user.yaml

获取登录 Token(用于 Dashboard 或 kubectl)

shell 复制代码
kubectl -n default create token dev-user

11.开机脚本

All-in-One 开机自愈脚本startup-k8s.sh

shell 复制代码
vim /usr/local/bin/startup-k8s.sh 
bash 复制代码
#!/bin/bash
# startup-k8s.sh - All-in-One Kubernetes 开机自愈脚本
# 放在 /usr/local/bin/startup-k8s.sh,并设置为开机启动(可选)

set -e

echo "🚀 Kubernetes 开机自愈流程启动..."

# 获取节点名
NODE_NAME=$(hostname)

# 1. 确保 kubelet 和 containerd 正在运行
echo "✅ 确保容器运行时和 kubelet 已启动..."
sudo systemctl enable --now containerd kubelet

# 2. 等待 kubelet 就绪(最多 60 秒)
echo "⏳ 等待 kubelet 就绪..."
for i in {1..30}; do
  if sudo crictl info &>/dev/null; then
    echo "   ✅ containerd 就绪"
    break
  fi
  sleep 2
done

# 3. 等待 API Server 可用
echo "⏳ 等待 Kubernetes API 就绪..."
for i in {1..60}; do
  if kubectl get nodes &>/dev/null; then
    echo "   ✅ API Server 就绪"
    break
  fi
  sleep 2
done

# 4. 自动 uncordon(解除 SchedulingDisabled)
echo "🔧 检查并解除节点调度禁用..."
if kubectl get node "$NODE_NAME" -o jsonpath='{.spec.unschedulable}' 2>/dev/null | grep -q 'true'; then
  kubectl uncordon "$NODE_NAME"
  echo "   ✅ 节点已恢复调度"
else
  echo "   ℹ️ 节点未被 cordoned,跳过"
fi

# 5. 自动移除 Master 污点(All-in-One 必须!)
echo "🧹 检查并移除 control-plane 污点..."
if kubectl describe node "$NODE_NAME" | grep -q 'node-role.kubernetes.io/control-plane:NoSchedule'; then
  kubectl taint nodes "$NODE_NAME" node-role.kubernetes.io/control-plane:NoSchedule- 2>/dev/null || true
  echo "   ✅ Master 污点已移除"
else
  echo "   ℹ️ 污点不存在,跳过"
fi

# 6. 等待 CoreDNS 就绪(可选,用于确认集群完全可用)
echo "⏳ 等待 CoreDNS 就绪..."
for i in {1..30}; do
  if kubectl get pod -n kube-system -l k8s-app=kube-dns --field-selector=status.phase=Running --no-headers | grep -q 'Running'; then
    echo "   ✅ CoreDNS 已运行,集群完全就绪!"
    break
  fi
  sleep 3
done

echo "🎉 Kubernetes All-in-One 集群已自愈完成!"
echo "👉 执行 'kubectl get pod -A' 查看状态"
shell 复制代码
sudo chmod +x /usr/local/bin/startup-k8s.sh

12.关机脚本

All-in-One 关机脚本(通用版)

复制代码
vim /usr/local/bin/shutdown-k8s.sh
shell 复制代码
#!/bin/bash
# shutdown-k8s.sh - 适用于 kubeadm / k3s 单节点集群

set -e

echo "🔄  开始 Kubernetes 优雅关机流程..."

# 1. 获取当前节点名(兼容 kubeadm/k3s)
NODE_NAME=$(kubectl get nodes -o jsonpath='{.items[0].metadata.name}' 2>/dev/null || hostname)

echo "📍  节点名称: $NODE_NAME"

# 2. 将节点标记为不可调度(cordon)
echo "⏸️   标记节点为不可调度..."
kubectl cordon "$NODE_NAME" 2>/dev/null || echo "⚠️  kubectl 不可用,跳过 cordon"

# 3. 驱逐所有 Pod(保留 DaemonSet,删除临时数据)
echo "🧹  驱逐工作负载 Pod..."
kubectl drain "$NODE_NAME" \
  --ignore-daemonsets \
  --delete-emptydir-data \
  --force \
  --grace-period=30 \
  2>/dev/null || echo "⚠️  drain 失败,继续关机(可能无工作负载)"

# 4. (可选)停止 kubelet 和容器运行时(防止关机时写入)
echo "⏹️   停止 kubelet 和 containerd..."
sudo systemctl stop kubelet containerd 2>/dev/null || echo "⚠️  服务停止失败,继续关机"

# 5. 执行关机
echo "🔌  正在关机..."
sudo shutdown -h now
shell 复制代码
sudo chmod +x /usr/local/bin/shutdown-k8s.sh
相关推荐
一周困⁸天.1 小时前
K8S-Service资源对象
云原生·容器·kubernetes
菜萝卜子3 小时前
k8s 本地动态存储 Local Path Provisioner 部署
容器·kubernetes
喜欢你,还有大家4 小时前
sealos——高可用集群的部署
云原生·容器·kubernetes
岚天start5 小时前
在线安装 Kubernetes(k8s)集群和开源版KubeSphere
容器·kubernetes·开源
能不能别报错5 小时前
k8s的cicd流水线环境搭建实验
云原生·容器·kubernetes
todoitbo5 小时前
openEuler 云原生进阶:K3s 轻量级 Kubernetes 集群实战
云原生·容器·kubernetes·openeuler
菜萝卜子5 小时前
k8s 启动 postgresql 数据库
数据库·postgresql·kubernetes
Empty_7776 小时前
K8S-Deployment资源对象
docker·容器·kubernetes
く成哦6 小时前
Rancher部署k8s集群:开启容器编排新篇章
运维·docker·云原生·容器·kubernetes·rancher