text
# helm 下载
wget -q https://get.helm.sh/helm-v3.14.3-linux-amd64.tar.gz
shell
#k8s镜像导出
ctr -n k8s.io images export --skip-manifest-json kube-apiserver:v1.29.3.tar registry.k8s.io/kube-apiserver:v1.29.3
ctr -n k8s.io images export --skip-manifest-json kube-controller-manager:v1.29.3.tar registry.k8s.io/kube-controller-manager:v1.29.3
ctr -n k8s.io images export --skip-manifest-json kube-scheduler:v1.29.3.tar registry.k8s.io/kube-scheduler:v1.29.3
ctr -n k8s.io images export --skip-manifest-json kube-proxy:v1.29.3.tar registry.k8s.io/kube-proxy:v1.29.3
ctr -n k8s.io images export --skip-manifest-json coredns:v1.11.1.tar registry.k8s.io/coredns/coredns:v1.11.1
ctr -n k8s.io images export --skip-manifest-json pause:3.9.tar registry.k8s.io/pause:3.9
ctr -n k8s.io images export --skip-manifest-json etcd:3.5.12-0.tar registry.k8s.io/etcd:3.5.12-0
ctr -n k8s.io images export --skip-manifest-json flannel-cni-plugin.tar docker.io/flannel/flannel-cni-plugin:v1.4.0-flannel1
ctr -n k8s.io images export --skip-manifest-json flannel:v0.24.4.tar docker.io/flannel/flannel:v0.24.4
k8s 环境部署出错
bash
# 查看kubeadm 需要的镜像文件
$ kubeadm config image list
registry.k8s.io/kube-apiserver:v1.29.3
registry.k8s.io/kube-controller-manager:v1.29.3
registry.k8s.io/kube-scheduler:v1.29.3
registry.k8s.io/kube-proxy:v1.29.3
registry.k8s.io/coredns/coredns:v1.11.1
registry.k8s.io/pause:3.9
registry.k8s.io/etcd:3.5.12-0
1. 检查 containerd 部署是否正常
containerd 配置文件位置 /etc/containerd/config.toml
shell
# 查看 containerd 版本
$ containerd -v 或者 ctr -v
# 查看 containerd 状态
$ systemctl status containerd
# 查看 containerd 的启停沙箱
$ grep sandbox_image /etc/containerd/config.toml
> sandbox_image = "registry.k8s.io/pause:3.9"
# 查看 containerd 是否启用系统调度
$ grep SystemdCgroup /etc/containerd/config.toml
> SystemdCgroup = true
containerd 重启
shell
systemctl daemon-reload
systemctl disable containerd
systemctl enable containerd
systemctl restart containerd
2. 检查镜像加载是否正常
shell
$ ctr -n k8s.io images ls|awk '{print $1}'|grep -v sha256
REF
docker.io/flannel/flannel-cni-plugin:v1.4.0-flannel1
docker.io/flannel/flannel:v0.24.4
registry.k8s.io/coredns/coredns:v1.11.1
registry.k8s.io/etcd:3.5.12-0
registry.k8s.io/kube-apiserver:v1.29.3
registry.k8s.io/kube-controller-manager:v1.29.3
registry.k8s.io/kube-proxy:v1.29.3
registry.k8s.io/kube-scheduler:v1.29.3
registry.k8s.io/pause:3.8
registry.k8s.io/pause:3.9
3. 检查 crictl 环境是否正常
bash
# 查看crictl version
$ crictl --version
>crictl version v1.28.0
# 查看 crictl 配置
cat /etc/crictl.yaml
>runtime-endpoint: unix:///run/containerd/containerd.sock
>image-endpoint: unix:///run/containerd/containerd.sock
>timeout: 10
>debug: false
>pull-image-on-create: false
# 查看是否能连接到 containerd
$ crictl --image-endpoint=unix:///run/containerd/containerd.sock images ls
IMAGE TAG IMAGE ID SIZE
docker.io/flannel/flannel-cni-plugin v1.4.0-flannel1 77c1250c26d96 4.5MB
docker.io/flannel/flannel v0.24.4 c9fe3bce8a6d8 32.7MB
registry.k8s.io/coredns/coredns v1.11.1 cbb01a7bd410d 18.2MB
registry.k8s.io/etcd 3.5.12-0 3861cfcd7c04c 57.2MB
registry.k8s.io/kube-apiserver v1.29.3 39f995c9f1996 35.1MB
registry.k8s.io/kube-controller-manager v1.29.3 6052a25da3f97 33.5MB
registry.k8s.io/kube-proxy v1.29.3 a1d263b5dc5b0 28.4MB
registry.k8s.io/kube-scheduler v1.29.3 8c390d98f50c0 18.6MB
registry.k8s.io/pause 3.8 4873874c08efc 311kB
registry.k8s.io/pause 3.9 e6f1816883972 322kB
4. 检查 kubelet 日志问题
kubelet 重启
shell
systemctl daemon-reload
systemctl disable kubelet
systemctl enable kubelet
systemctl restart kubelet
5. 检查发布失败的 kubeadm 配置信息
shell
$ cat /var/lib/kubelet/kubeadm-flags.env
KUBELET_KUBEADM_ARGS="--container-runtime-endpoint=unix:///run/containerd/containerd.sock --hostname-override=k8s-master --pod-infra-container-image=registry.k8s.io/pause:3.9"
异常问题记载及解决办法
kubelet 启动失败
查看进程日志
shell
journalctl -u kubelet # 查看Unit日志
或者
grep SandboxImage /var/log/messages
或者
less /var/log/message
服务Pod 一直处于pending
查看服务启动确定原因
shell
kubectl -n bdtp describe pod/bdtp-portal-86b7789c75-ffqf6
bash
# 查看节点信息
kubectl get nodes --show-labels
kubectl describe nodes k8s-slave1 | grep Taints
Taints: drunk=true:NoSchedule
# 删除k8s污点信息
kubectl taint node k8s-master node.kubernetes.io/not-ready-
kubectl taint node k8s-master node-role.kubernetes.io/control-plane-
网络问题引起的容器无法调度或者访问
需要重新安装flannal
shell
kubectl apply -f ./kube-flannel.yml