目录结构
~/ansible/k8s-cluster$ tree
.
├── ansible.cfg
├── inventory
│ ├── cluster.ini
│ └── group_vars
│ └── k8s_cluster.yml
├── playbooks
│ └── deploy-k8s.yml
└── roles
├── common
│ ├── files
│ │ ├── cni-plugins-linux-amd64-v1.8.0.tgz
│ │ ├── containerd-1.7.29-linux-amd64.tar.gz
│ │ ├── cri-containerd-1.7.29-linux-amd64.tar.gz
│ │ ├── libseccomp-2.5.5.tar.gz
│ │ └── runc.amd64
│ ├── handlers
│ │ └── main.yml
│ └── tasks
│ ├── main.yml
│ └── main_bak.yml
├── master
│ └── tasks
│ └── main.yml
└── node
└── tasks
└── main.yml
ansible.cfg配置
cat ansible.cfg
[defaults]
inventory = ./inventory/cluster.ini
remote_user = k8s
private_key_file = ~/.ssh/id_rsa
host_key_checking = False
roles_path = ./roles
[privilege_escalation]
become = True
become_method = sudo
:~/ansible/k8s-cluster$ cat inventory/cluster.ini
[master]
k8s-master01 ansible_host=192.168.1.80
[node]
k8s-node01 ansible_host=192.168.1.81
k8s-node02 ansible_host=192.168.1.82
[k8s_cluster:children]
master
node
~/ansible/k8s-cluster$ cat inventory/group_vars/k8s_cluster.yml
k8s_version: "1.28"
pod_network_cidr: "10.244.0.0/16"
service_cidr: "10.96.0.0/12"
kube_user: "k8s"
container_registry: "registry.aliyuncs.com/google_containers"
control_plane_endpoint: 192.168.1.80
skip_swap_check: false
calico_manifest_url: "https://raw.githubusercontent.com/projectcalico/calico/v3.27.4/manifests/calico.yaml"
playbook配置
~/ansible/k8s-cluster$ cat playbooks/deploy-k8s.yml
---
- name: 🐧 Prepare all Ubuntu nodes
hosts: k8s_cluster
roles:
- common
- name: 👑 Deploy Kubernetes Master (with Aliyun container images)
hosts: master
roles:
- master
- name: 🖥️ Join Worker Nodes to Cluster
hosts: node
roles:
- node
master以及node节点任务配置
~/ansible/k8s-cluster$ cat roles/master/tasks/main.yml
---
- name: Pre-pull Kubernetes container images from Aliyun
command: >
kubeadm config images pull
--image-repository=registry.aliyuncs.com/google_containers
--kubernetes-version=v{{ k8s_version }}.0
- name: Initialize Kubernetes control plane
command: >
kubeadm init
--image-repository=registry.aliyuncs.com/google_containers
--control-plane-endpoint={{ control_plane_endpoint }}
--pod-network-cidr={{ pod_network_cidr }}
--service-cidr={{ service_cidr }}
--kubernetes-version=v{{ k8s_version }}.0
--upload-certs
args:
creates: /etc/kubernetes/admin.conf
become: yes
- name: Ensure bootstrap tokens and cluster-info are properly configured
command: kubeadm init phase bootstrap-token
args:
chdir: /root
become: yes
run_once: true
when: inventory_hostname == groups['master'][0]
- name: Create .kube directory for regular user
file:
path: "/home/{{ kube_user }}/.kube"
state: directory
owner: "{{ kube_user }}"
group: "{{ kube_user }}"
mode: '0700'
become: yes
- name: Copy admin.conf to user's kubeconfig
copy:
src: /etc/kubernetes/admin.conf
dest: "/home/{{ kube_user }}/.kube/config"
owner: "{{ kube_user }}"
group: "{{ kube_user }}"
mode: '0600'
remote_src: yes
become: yes
# ======== 部署 kube-proxy ========
- name: Ensure kube-proxy ConfigMap exists
command: kubeadm init phase addon kube-proxy
environment:
KUBECONFIG: "/home/{{ kube_user }}/.kube/config"
become: yes
- name: Patch kube-proxy DaemonSet image to Aliyun registry
command: kubectl -n kube-system set image daemonset/kube-proxy kube-proxy=registry.aliyuncs.com/google_containers/kube-proxy:v{{ k8s_version }}.0
environment:
KUBECONFIG: "/home/{{ kube_user }}/.kube/config"
become: yes
- name: Wait for kube-proxy DaemonSet to be ready
command: kubectl -n kube-system rollout status daemonset/kube-proxy --timeout=120s
environment:
KUBECONFIG: "/home/{{ kube_user }}/.kube/config"
become: yes
- name: Download Calico manifest to master node
get_url:
url: "{{ calico_manifest_url }}"
dest: /tmp/calico.yaml
mode: '0644'
timeout: 30
become: yes
- name: Replace Calico image registry to Alibaba Cloud
replace:
path: /tmp/calico.yaml
regexp: 'image: docker\.io/calico/(.*):(.*)'
replace: 'image: m.daocloud.io/docker.io/calico/\1:\2'
become: yes
- name: Deploy Calico CNI from local modified manifest
command: kubectl apply -f /tmp/calico.yaml
environment:
KUBECONFIG: "/home/{{ kube_user }}/.kube/config"
become: yes
- name: Ensure bootstrap token and cluster-info ConfigMap are present
shell: |
# 尝试获取当前有效的 join 命令(验证 cluster-info 是否可读)
if kubeadm token create --print-join-command >/dev/null 2>&1; then
echo "cluster-info is valid"
else
# 如果失败,强制重建 bootstrap token 和 cluster-info
kubeadm init phase bootstrap-token
fi
args:
executable: /bin/bash
delegate_to: "{{ groups['master'][0] }}"
run_once: true
changed_when: false # 此任务视为检查/修复,不标记为 changed
# TASK: 动态生成 join 命令(每次运行都获取最新有效命令)
- name: Generate fresh kubeadm join command
command: kubeadm token create --print-join-command
register: join_command_result
delegate_to: "{{ groups['master'][0] }}"
run_once: true
changed_when: false
# TASK: 保存到临时文件供 node 使用
- name: Save join command to /tmp/k8s_join_cmd.sh on master
copy:
content: "{{ join_command_result.stdout }}"
dest: /tmp/k8s_join_cmd.sh
mode: '0644'
delegate_to: "{{ groups['master'][0] }}"
run_once: true
~/ansible/k8s-cluster$ cat roles/node/tasks/main.yml
---
- name: Wait for Kubernetes API server on master
wait_for:
host: "{{ control_plane_endpoint }}"
port: 6443
delay: 10
timeout: 300
- name: Fetch join command from master node
delegate_to: "{{ groups['master'][0] }}"
slurp:
src: /tmp/k8s_join_cmd.sh
register: join_script
- name: Join this node to the cluster
shell: "{{ join_script.content | b64decode }}"
args:
executable: /bin/bash
~/ansible/k8s-cluster$ cat roles/common/handlers/main.yml
---
- name: reload sysctl
command: sysctl --system
- name: restart kubelet
systemd:
name: kubelet
state: restarted
enabled: yes
主任务配置
~/ansible/k8s-cluster$ cat roles/common/tasks/main.yml
---
# ==============================
# 1. 关闭 swap(K8s 要求)
# ==============================
- name: Disable swap and comment in /etc/fstab
shell: |
swapoff -a
sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
become: yes
# ==============================
# 2. 加载内核模块 & sysctl
# ==============================
- name: Ensure kernel modules are present
copy:
content: |
overlay
br_netfilter
dest: /etc/modules-load.d/k8s.conf
mode: '0644'
become: yes
- name: Load kernel modules
command: modprobe {{ item }}
loop:
- overlay
- br_netfilter
args:
creates: /sys/module/{{ item }}
become: yes
- name: Configure sysctl settings
copy:
content: |
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-ip6tables = 1
net.ipv4.ip_forward = 1
dest: /etc/sysctl.d/k8s.conf
mode: '0644'
become: yes
- name: Apply sysctl settings
command: sysctl --system
become: yes
# ==============================
# 3. 安装基础包
# ==============================
- name: Install required system packages
apt:
name:
- apt-transport-https
- ca-certificates
- curl
- gnupg
update_cache: yes
cache_valid_time: 3600
become: yes
# ==============================
# 4. 配置 Kubernetes APT 源(阿里云)
# ==============================
- name: Ensure /etc/apt/keyrings directory exists
file:
path: /etc/apt/keyrings
state: directory
mode: '0755'
become: yes
- name: Download Kubernetes GPG key from Aliyun
get_url:
url: https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg
dest: /tmp/kubernetes-apt-key.gpg
mode: '0644'
become: yes
- name: Convert GPG key to keyring format
command: gpg --batch --yes --dearmor -o /etc/apt/keyrings/kubernetes-aliyun.gpg /tmp/kubernetes-apt-key.gpg
args:
creates: /etc/apt/keyrings/kubernetes-aliyun.gpg
become: yes
- name: Add Kubernetes APT repository (Aliyun)
apt_repository:
repo: "deb [signed-by=/etc/apt/keyrings/kubernetes-aliyun.gpg] https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main"
filename: kubernetes-aliyun
state: present
become: yes
# ==============================
# 5. 安装 containerd 运行时
# ==============================
- name: Install libseccomp2 (required by containerd)
apt:
name: libseccomp2
state: present
update_cache: yes
become: yes
- name: Copy cri-containerd archive
copy:
src: cri-containerd-1.7.29-linux-amd64.tar.gz
dest: /tmp/cri-containerd.tar.gz
become: yes
- name: Extract cri-containerd to /
unarchive:
src: /tmp/cri-containerd.tar.gz
dest: /
remote_src: yes
become: yes
- name: Copy runc binary
copy:
src: runc.amd64
dest: /usr/local/bin/runc
mode: '0755'
become: yes
- name: Create CNI bin directory
file:
path: /opt/cni/bin
state: directory
mode: '0755'
become: yes
- name: Copy CNI plugins
copy:
src: cni-plugins-linux-amd64-v1.8.0.tgz
dest: /tmp/cni-plugins.tgz
become: yes
- name: Extract CNI plugins
unarchive:
src: /tmp/cni-plugins.tgz
dest: /opt/cni/bin
remote_src: yes
become: yes
# ==============================
# 6. 配置 containerd(关键:先停再改再启)
# ==============================
- name: Ensure /etc/containerd directory exists
file:
path: /etc/containerd
state: directory
owner: root
group: root
mode: '0755'
become: yes
- name: Stop containerd before config change
systemd:
name: containerd
state: stopped
become: yes
- name: Generate default containerd config
shell: /usr/local/bin/containerd config default > /etc/containerd/config.toml
args:
creates: /etc/containerd/config.toml
become: yes
- name: Set SystemdCgroup = true (fix TOML boolean)
lineinfile:
path: /etc/containerd/config.toml
regexp: '^(\s*)SystemdCgroup\s*=\s*\w+'
line: '\1SystemdCgroup = true'
backrefs: yes
become: yes
- name: Set sandbox_image to Alibaba registry
lineinfile:
path: /etc/containerd/config.toml
regexp: '^(\s*)sandbox_image\s*=\s*".*"'
line: '\1sandbox_image = "registry.aliyuncs.com/google_containers/pause:3.9"'
backrefs: yes
become: yes
- name: Start containerd with new config
systemd:
name: containerd
enabled: yes
state: started
daemon_reload: yes
become: yes
- name: Wait for containerd socket
wait_for:
path: /run/containerd/containerd.sock
timeout: 15
become: yes
# ==============================
# 7. 安装 kubelet, kubeadm, kubectl
# ==============================
- name: Install Kubernetes components
apt:
name:
- "kubelet={{ k8s_version }}.*"
- "kubeadm={{ k8s_version }}.*"
- "kubectl={{ k8s_version }}.*"
state: present
update_cache: yes
become: yes
- name: Hold Kubernetes packages
dpkg_selections:
name: "{{ item }}"
selection: hold
loop:
- kubelet
- kubeadm
- kubectl
become: yes
离线安装包
ls roles/common/files/
cni-plugins-linux-amd64-v1.8.0.tgz cri-containerd-1.7.29-linux-amd64.tar.gz runc.amd64
containerd-1.7.29-linux-amd64.tar.gz libseccomp-2.5.5.tar.gz
安装执行命令:~/ansible/k8s-cluster$ ansible-playbook playbooks/deploy-k8s.yml -v
国内网络可能存在中途calico manifest无法下载的情况,多试几次