重命名主机名(如果是集群,必须重命名)
sudo hostnamectl set-hostname node_name
vim /etc/hosts(添加hostname映射关系)
一:docker
1.解压docker所需文件
bash
tar -zxvf docker-26.0.0.tgz
mv docker/* /usr/bin/
- 编辑docker文件
bash
vim /usr/lib/systemd/system/docker.service
写入:
bash
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewalld.service
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/bin/dockerd
ExecReload=/bin/kill -s HUP $MAINPID
LimitNOFILE=infinity
LimitNPROC=infinity
TimeoutStartSec=0
Delegate=yes
KillMode=process
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.target
3.配置:
bash
apt install apt-transport-https ca-certificates curl gnupg-agent software-properties-common -y
curl -fsSL https://mirrors.aliyun.com/docker-ce/linux/ubuntu/gpg | sudo apt-key add -
apt update
cat << EOF > /etc/docker/daemon.json
{
"data-root": "/mnt/sdb1/docker",
"default-runtime": "nvidia",
"runtimes": {
"nvidia": {
"path": "/usr/bin/nvidia-container-runtime",
"runtimeArgs": []
}
},
"log-driver": "json-file",
"log-opts": {
"max-size": "100m",
"max-file": "3"
},
"registry-mirrors": ["https://registry.docker-cn.com"],
"exec-opts": ["native.cgroupdriver=systemd"]
}
EOF
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
# 开启外网代理,因为要从外网下工具包,也可以不开,就是速度很慢
export http_proxy=http://ip:port
export https_proxy=http://ip:port
sudo apt update && apt install nvidia-container-toolkit -y
systemctl daemon-reload
systemctl enable docker
systemctl restart docker
二:k8s安装前准备
bash
sudo systemctl stop firewalld
sudo systemctl status firewalld
sudo swapoff -a
sudo sed -i '/swap/s/^/#/' /etc/fstab
cat <<EOF | sudo tee /etc/modules-load.d/k8s.conf
overlay
br_netfilter
EOF
sudo modprobe overlay
sudo modprobe br_netfilter
cat <<EOF | sudo tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
EOF
sudo sysctl --system
# 验证参数(显示3个1即为配置成功)
sudo sysctl net.bridge.bridge-nf-call-iptables net.bridge.bridge-nf-call-ip6tables net.ipv4.ip_forward
cat <<EOF | sudo tee /etc/modules-load.d/ipvs.conf
ip_vs
ip_vs_rr
ip_vs_wrr
ip_vs_sh
nf_conntrack
EOF
# fix system default too small
echo fs.inotify.max_user_instances=81920 >> /etc/sysctl.conf && sysctl -p
sudo apt update && sudo apt install -y apt-transport-https ca-certificates curl gnupg2
echo "deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main" | sudo tee /etc/apt/sources.list.d/kubernetes.list
sudo apt update
安装kubelet 1.23.16(及配套组件)
bash
# 1. 清理旧版本(若已安装过k8s组件)
sudo apt remove -y kubelet kubeadm kubectl
# 2. 安装指定版本kubelet 1.23.16
sudo apt install -y kubelet=1.23.16-00 kubeadm=1.23.16-00 kubectl=1.23.16-00
# 3. 锁定版本,防止apt自动更新
sudo apt-mark hold kubelet kubeadm kubectl
# 4. 配置kubelet开机自启
sudo systemctl enable kubelet
# 5. 启动kubelet服务
sudo systemctl start kubelet
# 6. 验证kubelet状态
sudo systemctl status kubelet
> 注意:此时kubelet可能显示`active (exited)`或`waiting`状态,属于正常现象。因为kubelet是集群节点代理,需通过`kubeadm init`(初始化master节点)或`kubeadm join`(加入现有集群)后,才能进入正常运行状态。
# 7. 验证kubelet版本
kubelet --version
# 若输出`Kubernetes v1.23.16`,说明版本安装正确。
# 8. 验证kubectl版本(配套验证)
kubectl version --client
## 补充说明
1. 若安装过程中出现"依赖缺失"报错,可执行以下命令补充依赖:
```bash
sudo apt install -y conntrack socat ebtables ethtool
```
2. 若apt update时出现"GPG验证失败",请重新执行第三步第2条,确保GPG密钥导入成功。
3. 麒麟V10桌面版可能存在桌面环境与kubelet的端口冲突,若后续初始化集群失败,可暂时关闭桌面环境(`sudo systemctl stop lightdm`)后重试。
# 4. 后续集群初始化命令(master节点)示例(参考):
# ```bash
# sudo kubeadm init --apiserver-advertise-address=你的节点IP --pod-network-cidr=10.244.0.0/16 --kubernetes-version=1.23.16
# ```
三。load 必须的镜像
chmod +x load-images.sh
./load-images.sh
四。初始化k8s
bash
# 需要修改镜像仓库地址和calico.yaml的获取地址
sudo kubeadm init --apiserver-advertise-address=172.17.0.1 --image-repository registry.aliyuns/google_containers --kubernetes-version v1.23.16 --service-cidr=10.96.0.0/12 --pod-network-cidr=10.244.0.0/16 --ignore-preflight-errors=all
mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
wget --no-check-certificate https://ip:port/ftp/calico.yaml
kubectl apply -f calico.yaml
# 所有kube-system的pod都正常运行表示成功
kubectl get pod --all-namespaces
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl > /dev/null
kubectl get no -o yaml | grep taint -A 5
kubectl taint nodes --all node-role.kubernetes.io/master-
bash
# 所有kube-system的pod都正常运行时,编辑这个文件
vim /etc/kubernetes/manifests/kube-apiserver.yaml
# 找到 - --service-cluster-ip-range=10.96.0.0/12
# 在这一行下面添加一行 - --service-node-port-range=1-65535
# 然后保存,等待api-server重启允许成功后可以进行下一步骤
五、安装kubesphere
访问配置生成器地址:http://192.168.20.4:8080
选择和输入对应参数点击生成配置压缩包,解压缩后然后将生成的内容上传到服务器上
里面有一个README.txt文件,按照里面的步骤进行部署即可
等待20分钟左右镜像拉取
等待的过程中可以安装kubesphere管理平台
bash
kubectl apply -f https://.com/ftp/kubesphere-installer.yaml
kubectl apply -f https://.com/ftp/cluster-configuration.yaml
# 等待5-10分钟后修改
kubectl patch deployment -n kubesphere-system ks-console --type='json' -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "IfNotPresent"}]'
kubectl patch deployment -n kubesphere-system ks-apiserver --type='json' -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "IfNotPresent"}]'
kubectl patch deployment -n kubesphere-system ks-controller-manager --type='json' -p='[{"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "IfNotPresent"}]'
# kubesphere平台正常登录后可以清理(如何判断是否正常访问 ip地址:30880,登录后修改默认密码登录成功表示kubepshere平台正常)
# 默认帐户和密码 (admin/P@88w0rd)
# 请将密码修改为:
kubectl delete deploy ks-installer -n kubesphere-system
六:配置NTP服务
bash
配置NTP服务
sudo timedatectl set-timezone Asia/Shanghai
sudo vim /etc/systemd/timesyncd.conf
写入内容:
[Time]
NTP=10.190.5.225
FallbackNTP=ntp.aliyun.com time.aliyun.com
重启服务并启用开机自启:
sudo systemctl restart systemd-timesyncd
sudo systemctl enable systemd-timesyncd
验证同步状态:
timedatectl status