Kubernetes 服务高可用详解

Kubernetes 通过多层次、多维度的机制来保证服务的高可用性，确保应用在各种故障场景下都能持续提供服务。

1. 高可用架构概述

1.1 Kubernetes 高可用架构图

arduino 复制代码

┌─────────────────────────────────────────────────────────────────┐
│                    Kubernetes HA Architecture                   │
├─────────────────────────────────────────────────────────────────┤
│  ┌─────────────────────────────────────────────────────────────┐ │
│  │                   Control Plane HA                         │ │
│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐        │ │
│  │  │   Master1   │  │   Master2   │  │   Master3   │        │ │
│  │  │             │  │             │  │             │        │ │
│  │  │ API Server  │  │ API Server  │  │ API Server  │        │ │
│  │  │ Scheduler   │  │ Scheduler   │  │ Scheduler   │        │ │
│  │  │ Controller  │  │ Controller  │  │ Controller  │        │ │
│  │  │   Manager   │  │   Manager   │  │   Manager   │        │ │
│  │  └─────────────┘  └─────────────┘  └─────────────┘        │ │
│  └─────────────────────────────────────────────────────────────┘ │
│                             │                                   │
│  ┌─────────────────────────────────────────────────────────────┐ │
│  │                    Load Balancer                           │ │
│  │              (HAProxy/NGINX/Cloud LB)                      │ │
│  └─────────────────────────────────────────────────────────────┘ │
│                             │                                   │
│  ┌─────────────────────────────────────────────────────────────┐ │
│  │                    etcd Cluster                            │ │
│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐        │ │
│  │  │   etcd-1    │  │   etcd-2    │  │   etcd-3    │        │ │
│  │  └─────────────┘  └─────────────┘  └─────────────┘        │ │
│  └─────────────────────────────────────────────────────────────┘ │
│                             │                                   │
│  ┌─────────────────────────────────────────────────────────────┐ │
│  │                   Worker Nodes                             │ │
│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐        │ │
│  │  │   Node-1    │  │   Node-2    │  │   Node-N    │        │ │
│  │  │             │  │             │  │             │        │ │
│  │  │   Kubelet   │  │   Kubelet   │  │   Kubelet   │        │ │
│  │  │ Kube-proxy  │  │ Kube-proxy  │  │ Kube-proxy  │        │ │
│  │  │   Runtime   │  │   Runtime   │  │   Runtime   │        │ │
│  │  └─────────────┘  └─────────────┘  └─────────────┘        │ │
│  └─────────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘

2. Control Plane 高可用

2.1 API Server 高可用

yaml 复制代码

# haproxy.cfg - API Server 负载均衡
global
    log stdout local0
    chroot /var/lib/haproxy
    stats socket /run/haproxy/admin.sock level admin
    stats timeout 2m
    user haproxy
    group haproxy
    daemon

defaults
    mode http
    log global
    option httplog
    option dontlognull
    option forwardfor
    option redispatch
    timeout connect 5000ms
    timeout client 50000ms
    timeout server 50000ms

frontend kubernetes-frontend
    bind *:6443
    mode tcp
    option tcplog
    default_backend kubernetes-backend

backend kubernetes-backend
    mode tcp
    balance roundrobin
    option tcp-check
    server master1 192.168.1.10:6443 check fall 3 rise 2
    server master2 192.168.1.11:6443 check fall 3 rise 2
    server master3 192.168.1.12:6443 check fall 3 rise 2

2.2 etcd 集群高可用

bash 复制代码

#!/bin/bash
# setup-etcd-cluster.sh

# etcd 集群配置
ETCD_NODES=(
    "etcd-1=https://192.168.1.10:2380"
    "etcd-2=https://192.168.1.11:2380"
    "etcd-3=https://192.168.1.12:2380"
)

CLUSTER_STATE="new"
CLUSTER_TOKEN="etcd-cluster-1"

# 节点1配置
cat > /etc/systemd/system/etcd.service << EOF
[Unit]
Description=etcd
Documentation=https://github.com/coreos

[Service]
Type=notify
User=etcd
Environment=ETCD_DATA_DIR=/var/lib/etcd
Environment=ETCD_NAME=etcd-1
Environment=ETCD_INITIAL_ADVERTISE_PEER_URLS=https://192.168.1.10:2380
Environment=ETCD_LISTEN_PEER_URLS=https://192.168.1.10:2380
Environment=ETCD_LISTEN_CLIENT_URLS=https://192.168.1.10:2379,https://127.0.0.1:2379
Environment=ETCD_ADVERTISE_CLIENT_URLS=https://192.168.1.10:2379
Environment=ETCD_INITIAL_CLUSTER_TOKEN=${CLUSTER_TOKEN}
Environment=ETCD_INITIAL_CLUSTER=${ETCD_NODES[*]}
Environment=ETCD_INITIAL_CLUSTER_STATE=${CLUSTER_STATE}
Environment=ETCD_CERT_FILE=/etc/etcd/server.crt
Environment=ETCD_KEY_FILE=/etc/etcd/server.key
Environment=ETCD_TRUSTED_CA_FILE=/etc/etcd/ca.crt
Environment=ETCD_CLIENT_CERT_AUTH=true
Environment=ETCD_PEER_CERT_FILE=/etc/etcd/peer.crt
Environment=ETCD_PEER_KEY_FILE=/etc/etcd/peer.key
Environment=ETCD_PEER_TRUSTED_CA_FILE=/etc/etcd/ca.crt
Environment=ETCD_PEER_CLIENT_CERT_AUTH=true
ExecStart=/usr/local/bin/etcd
Restart=on-failure
RestartSec=5
LimitNOFILE=40000

[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable etcd
systemctl start etcd

2.3 Scheduler 和 Controller Manager 高可用

yaml 复制代码

# kube-scheduler.yaml
apiVersion: v1
kind: Pod
metadata:
  name: kube-scheduler
  namespace: kube-system
spec:
  hostNetwork: true
  containers:
  - name: kube-scheduler
    image: k8s.gcr.io/kube-scheduler:v1.28.0
    command:
    - kube-scheduler
    - --bind-address=0.0.0.0
    - --kubeconfig=/etc/kubernetes/scheduler.conf
    - --leader-elect=true        # 启用领导选举
    - --leader-elect-lease-duration=15s
    - --leader-elect-renew-deadline=10s
    - --leader-elect-retry-period=2s
    volumeMounts:
    - name: kubeconfig
      mountPath: /etc/kubernetes/scheduler.conf
      readOnly: true
  volumes:
  - name: kubeconfig
    hostPath:
      path: /etc/kubernetes/scheduler.conf

---
# kube-controller-manager.yaml  
apiVersion: v1
kind: Pod
metadata:
  name: kube-controller-manager
  namespace: kube-system
spec:
  hostNetwork: true
  containers:
  - name: kube-controller-manager
    image: k8s.gcr.io/kube-controller-manager:v1.28.0
    command:
    - kube-controller-manager
    - --bind-address=0.0.0.0
    - --kubeconfig=/etc/kubernetes/controller-manager.conf
    - --leader-elect=true        # 启用领导选举
    - --leader-elect-lease-duration=15s
    - --leader-elect-renew-deadline=10s
    - --leader-elect-retry-period=2s
    - --use-service-account-credentials=true
    volumeMounts:
    - name: kubeconfig
      mountPath: /etc/kubernetes/controller-manager.conf
      readOnly: true
  volumes:
  - name: kubeconfig
    hostPath:
      path: /etc/kubernetes/controller-manager.conf

3. Pod 级别高可用

3.1 多副本部署 (ReplicaSet/Deployment)

yaml 复制代码

# high-availability-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: web-app
  labels:
    app: web-app
spec:
  replicas: 3                    # 多副本
  strategy:
    type: RollingUpdate          # 滚动更新策略
    rollingUpdate:
      maxUnavailable: 1          # 最大不可用副本数
      maxSurge: 1                # 最大额外副本数
  selector:
    matchLabels:
      app: web-app
  template:
    metadata:
      labels:
        app: web-app
    spec:
      # 反亲和性：确保Pod分布在不同节点
      affinity:
        podAntiAffinity:
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 100
            podAffinityTerm:
              labelSelector:
                matchExpressions:
                - key: app
                  operator: In
                  values:
                  - web-app
              topologyKey: kubernetes.io/hostname
      
      containers:
      - name: web-app
        image: nginx:1.21
        ports:
        - containerPort: 80
        
        # 健康检查
        livenessProbe:
          httpGet:
            path: /health
            port: 80
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3
          
        readinessProbe:
          httpGet:
            path: /ready
            port: 80
          initialDelaySeconds: 5
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 3
          
        # 资源限制
        resources:
          requests:
            memory: "128Mi"
            cpu: "100m"
          limits:
            memory: "256Mi"
            cpu: "200m"
            
        # 优雅停机
        lifecycle:
          preStop:
            exec:
              command: ["/bin/sh", "-c", "sleep 15"]
      
      # 容忍节点故障
      tolerations:
      - key: node.kubernetes.io/not-ready
        operator: Exists
        effect: NoExecute
        tolerationSeconds: 300
      - key: node.kubernetes.io/unreachable
        operator: Exists
        effect: NoExecute
        tolerationSeconds: 300

3.2 Pod Disruption Budget (PDB)

yaml 复制代码

# pod-disruption-budget.yaml
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
  name: web-app-pdb
spec:
  minAvailable: 2              # 最小可用副本数
  # maxUnavailable: 1          # 或者设置最大不可用副本数
  selector:
    matchLabels:
      app: web-app

3.3 HorizontalPodAutoscaler (HPA)

yaml 复制代码

# hpa.yaml
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
  name: web-app-hpa
spec:
  scaleTargetRef:
    apiVersion: apps/v1
    kind: Deployment
    name: web-app
  minReplicas: 3               # 最小副本数
  maxReplicas: 10              # 最大副本数
  metrics:
  - type: Resource
    resource:
      name: cpu
      target:
        type: Utilization
        averageUtilization: 70   # CPU使用率阈值
  - type: Resource
    resource:
      name: memory
      target:
        type: Utilization
        averageUtilization: 80   # 内存使用率阈值
  behavior:
    scaleDown:
      stabilizationWindowSeconds: 300  # 缩容稳定窗口
      policies:
      - type: Percent
        value: 10                      # 每次最多缩容10%
        periodSeconds: 60
    scaleUp:
      stabilizationWindowSeconds: 60   # 扩容稳定窗口
      policies:
      - type: Percent
        value: 50                      # 每次最多扩容50%
        periodSeconds: 60

4. 服务级别高可用

4.1 Service 配置

yaml 复制代码

# service.yaml
apiVersion: v1
kind: Service
metadata:
  name: web-app-service
  labels:
    app: web-app
spec:
  selector:
    app: web-app
  ports:
  - name: http
    port: 80
    targetPort: 80
    protocol: TCP
  type: ClusterIP
  sessionAffinity: None          # 不使用会话亲和，实现负载均衡
  
---
# headless-service.yaml (用于StatefulSet)
apiVersion: v1
kind: Service
metadata:
  name: web-app-headless
spec:
  clusterIP: None                # Headless Service
  selector:
    app: web-app
  ports:
  - port: 80
    targetPort: 80

4.2 Ingress 高可用

yaml 复制代码

# ingress.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: web-app-ingress
  annotations:
    nginx.ingress.kubernetes.io/rewrite-target: /
    nginx.ingress.kubernetes.io/ssl-redirect: "true"
    nginx.ingress.kubernetes.io/upstream-fail-timeout: "5"
    nginx.ingress.kubernetes.io/upstream-max-fails: "3"
    
    # 健康检查
    nginx.ingress.kubernetes.io/health-check-path: "/health"
    nginx.ingress.kubernetes.io/health-check-interval: "10s"
    
    # 负载均衡
    nginx.ingress.kubernetes.io/load-balance: "round_robin"
    
spec:
  ingressClassName: nginx
  tls:
  - hosts:
    - web-app.example.com
    secretName: web-app-tls
  rules:
  - host: web-app.example.com
    http:
      paths:
      - path: /
        pathType: Prefix
        backend:
          service:
            name: web-app-service
            port:
              number: 80

---
# nginx-ingress-controller HA部署
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nginx-ingress-controller
  namespace: ingress-nginx
spec:
  replicas: 3                    # 多副本部署
  selector:
    matchLabels:
      app: nginx-ingress-controller
  template:
    metadata:
      labels:
        app: nginx-ingress-controller
    spec:
      affinity:
        podAntiAffinity:          # 反亲和性
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchLabels:
                app: nginx-ingress-controller
            topologyKey: kubernetes.io/hostname
      containers:
      - name: nginx-ingress-controller
        image: k8s.gcr.io/ingress-nginx/controller:v1.8.0
        ports:
        - containerPort: 80
        - containerPort: 443

5. 节点级别高可用

5.1 Node Affinity 和 Taints

yaml 复制代码

# node-affinity-example.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: critical-app
spec:
  replicas: 3
  selector:
    matchLabels:
      app: critical-app
  template:
    metadata:
      labels:
        app: critical-app
    spec:
      # 节点亲和性
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
            - matchExpressions:
              - key: node-type
                operator: In
                values:
                - high-performance
          preferredDuringSchedulingIgnoredDuringExecution:
          - weight: 1
            preference:
              matchExpressions:
              - key: zone
                operator: In
                values:
                - zone-a
        
        # Pod反亲和性
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
              - key: app
                operator: In
                values:
                - critical-app
            topologyKey: "kubernetes.io/hostname"
      
      # 容忍特定污点
      tolerations:
      - key: "dedicated"
        operator: "Equal"
        value: "critical-apps"
        effect: "NoSchedule"
      
      containers:
      - name: app
        image: nginx:1.21

5.2 节点故障处理

bash 复制代码

# 添加节点污点（维护模式）
kubectl taint nodes node1 maintenance=true:NoSchedule

# 排空节点（安全迁移Pod）
kubectl drain node1 --ignore-daemonsets --delete-emptydir-data

# 标记节点为不可调度
kubectl cordon node1

# 恢复节点
kubectl uncordon node1

# 移除污点
kubectl taint nodes node1 maintenance-

6. 存储高可用

6.1 StatefulSet 高可用

yaml 复制代码

# statefulset-ha.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: database
spec:
  serviceName: database-headless
  replicas: 3
  podManagementPolicy: Parallel   # 并行启动Pod
  updateStrategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1           # 更新时最大不可用数
  selector:
    matchLabels:
      app: database
  template:
    metadata:
      labels:
        app: database
    spec:
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchLabels:
                app: database
            topologyKey: kubernetes.io/hostname
      containers:
      - name: database
        image: mysql:8.0
        ports:
        - containerPort: 3306
        env:
        - name: MYSQL_ROOT_PASSWORD
          valueFrom:
            secretKeyRef:
              name: mysql-secret
              key: password
        volumeMounts:
        - name: data
          mountPath: /var/lib/mysql
        - name: config
          mountPath: /etc/mysql/conf.d
      volumes:
      - name: config
        configMap:
          name: mysql-config
  volumeClaimTemplates:
  - metadata:
      name: data
    spec:
      accessModes: ["ReadWriteOnce"]
      storageClassName: fast-ssd    # 高性能存储类
      resources:
        requests:
          storage: 100Gi

---
# 存储类配置
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: fast-ssd
provisioner: kubernetes.io/aws-ebs
parameters:
  type: gp3
  iops: "3000"
  throughput: "125"
reclaimPolicy: Retain             # 保留策略
allowVolumeExpansion: true        # 允许扩容
volumeBindingMode: WaitForFirstConsumer

6.2 PersistentVolume 高可用

yaml 复制代码

# pv-ha.yaml
apiVersion: v1
kind: PersistentVolume
metadata:
  name: database-pv-1
spec:
  capacity:
    storage: 100Gi
  accessModes:
  - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  storageClassName: fast-ssd
  mountOptions:
  - hard
  - nfsvers=4.1
  - rsize=1048576
  - wsize=1048576
  - timeo=600
  - retrans=2
  csi:
    driver: ebs.csi.aws.com
    volumeHandle: vol-1234567890abcdef0
    fsType: ext4

---
# 跨区域复制PV
apiVersion: v1
kind: PersistentVolume
metadata:
  name: database-pv-replica
spec:
  capacity:
    storage: 100Gi
  accessModes:
  - ReadWriteOnce
  persistentVolumeReclaimPolicy: Retain
  storageClassName: replicated-storage
  csi:
    driver: rook-ceph-block
    volumeHandle: 0001-0024-fed5480a-f00f-417a-a51d-31d8a8144c2f-0000000000000001-ffffffff

7. 网络高可用

7.1 多网络接口

yaml 复制代码

# multus-network.yaml
apiVersion: "k8s.cni.cncf.io/v1"
kind: NetworkAttachmentDefinition
metadata:
  name: macvlan-config
spec:
  config: '{
    "cniVersion": "0.3.0",
    "type": "macvlan",
    "master": "eth1",
    "mode": "bridge",
    "ipam": {
      "type": "host-local",
      "subnet": "192.168.1.0/24",
      "rangeStart": "192.168.1.100",
      "rangeEnd": "192.168.1.200",
      "gateway": "192.168.1.1"
    }
  }'

---
# pod-with-multiple-networks.yaml
apiVersion: v1
kind: Pod
metadata:
  name: multi-network-pod
  annotations:
    k8s.v1.cni.cncf.io/networks: macvlan-config
spec:
  containers:
  - name: app
    image: nginx:1.21
    ports:
    - containerPort: 80

7.2 网络策略

yaml 复制代码

# network-policy.yaml
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: web-app-netpol
spec:
  podSelector:
    matchLabels:
      app: web-app
  policyTypes:
  - Ingress
  - Egress
  ingress:
  - from:
    - namespaceSelector:
        matchLabels:
          name: frontend
    - podSelector:
        matchLabels:
          role: frontend
    ports:
    - protocol: TCP
      port: 80
  egress:
  - to:
    - namespaceSelector:
        matchLabels:
          name: backend
    ports:
    - protocol: TCP
      port: 3306
  - to: []                      # 允许访问外部DNS
    ports:
    - protocol: UDP
      port: 53

8. 监控和告警

8.1 健康检查配置

yaml 复制代码

# comprehensive-health-checks.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
  name: web-app-with-health-checks
spec:
  replicas: 3
  selector:
    matchLabels:
      app: web-app
  template:
    metadata:
      labels:
        app: web-app
    spec:
      containers:
      - name: web-app
        image: nginx:1.21
        ports:
        - containerPort: 80
        
        # 启动探针 - 容器启动时检查
        startupProbe:
          httpGet:
            path: /health
            port: 80
          initialDelaySeconds: 10
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 30      # 启动探针失败30次才认为失败
          successThreshold: 1
        
        # 存活探针 - 容器运行期间检查
        livenessProbe:
          httpGet:
            path: /health
            port: 80
          initialDelaySeconds: 30
          periodSeconds: 10
          timeoutSeconds: 5
          failureThreshold: 3       # 连续失败3次重启容器
          successThreshold: 1
        
        # 就绪探针 - 确定容器是否准备好接收流量
        readinessProbe:
          httpGet:
            path: /ready
            port: 80
          initialDelaySeconds: 5
          periodSeconds: 5
          timeoutSeconds: 3
          failureThreshold: 3       # 连续失败3次从Service移除
          successThreshold: 1
        
        # 自定义健康检查脚本
        lifecycle:
          postStart:
            exec:
              command:
              - /bin/sh
              - -c
              - 'echo "Container started" > /tmp/health'
          preStop:
            exec:
              command:
              - /bin/sh
              - -c
              - 'nginx -s quit; while killall -0 nginx; do sleep 1; done'

8.2 Prometheus 监控

yaml 复制代码

# prometheus-config.yaml
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      evaluation_interval: 15s
    
    rule_files:
    - "alerts.yml"
    
    alerting:
      alertmanagers:
      - static_configs:
        - targets:
          - alertmanager:9093
    
    scrape_configs:
    - job_name: 'kubernetes-apiservers'
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https
    
    - job_name: 'kubernetes-nodes'
      kubernetes_sd_configs:
      - role: node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
    
    - job_name: 'kubernetes-pods'
      kubernetes_sd_configs:
      - role: pod
      relabel_configs:
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)

  alerts.yml: |
    groups:
    - name: kubernetes-alerts
      rules:
      - alert: PodCrashLooping
        expr: rate(kube_pod_container_status_restarts_total[15m]) > 0
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "Pod {{ $labels.pod }} is crash looping"
          description: "Pod {{ $labels.pod }} in namespace {{ $labels.namespace }} is restarting frequently"
      
      - alert: NodeNotReady
        expr: kube_node_status_condition{condition="Ready",status="true"} == 0
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "Node {{ $labels.node }} is not ready"
          description: "Node {{ $labels.node }} has been not ready for more than 5 minutes"
      
      - alert: PodNotReady
        expr: kube_pod_status_ready{condition="true"} == 0
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "Pod {{ $labels.pod }} is not ready"
          description: "Pod {{ $labels.pod }} in namespace {{ $labels.namespace }} has been not ready for more than 10 minutes"

9. 故障恢复机制

9.1 自动故障恢复

bash 复制代码

#!/bin/bash
# auto-recovery-script.sh

# 监控脚本 - 检查关键组件状态
check_cluster_health() {
    echo "Checking cluster health..."
    
    # 检查节点状态
    NOT_READY_NODES=$(kubectl get nodes --no-headers | grep -v Ready | wc -l)
    if [ $NOT_READY_NODES -gt 0 ]; then
        echo "WARNING: $NOT_READY_NODES nodes are not ready"
        # 自动修复逻辑
        auto_fix_nodes
    fi
    
    # 检查关键系统Pod
    SYSTEM_PODS_NOT_READY=$(kubectl get pods -n kube-system --no-headers | grep -v Running | grep -v Completed | wc -l)
    if [ $SYSTEM_PODS_NOT_READY -gt 0 ]; then
        echo "WARNING: $SYSTEM_PODS_NOT_READY system pods are not running"
        # 重启有问题的系统Pod
        auto_fix_system_pods
    fi
    
    # 检查应用Pod状态
    check_application_health
}

auto_fix_nodes() {
    echo "Attempting to fix node issues..."
    
    # 获取NotReady节点
    kubectl get nodes --no-headers | grep NotReady | while read node status rest; do
        echo "Attempting to fix node: $node"
        
        # 尝试重启kubelet
        ssh $node "systemctl restart kubelet"
        
        # 等待节点恢复
        sleep 30
        
        # 如果还是NotReady，标记为维护模式
        if kubectl get node $node | grep -q NotReady; then
            kubectl cordon $node
            kubectl drain $node --ignore-daemonsets --delete-emptydir-data --force
            
            # 发送告警通知
            send_alert "Node $node failed to recover and has been drained"
        fi
    done
}

auto_fix_system_pods() {
    echo "Fixing system pods..."
    
    # 重启有问题的系统Pod
    kubectl get pods -n kube-system --no-headers | grep -v Running | grep -v Completed | while read pod status rest; do
        echo "Restarting system pod: $pod"
        kubectl delete pod $pod -n kube-system
    done
}

check_application_health() {
    echo "Checking application health..."
    
    # 检查部署状态
    kubectl get deployments --all-namespaces --no-headers | while read namespace deployment ready uptodate available age; do
        if [[ "$ready" != "$available" ]]; then
            echo "Deployment $deployment in namespace $namespace is not fully available"
            
            # 尝试滚动重启
            kubectl rollout restart deployment/$deployment -n $namespace
        fi
    done
}

# 主循环
while true; do
    check_cluster_health
    sleep 60
done

9.2 备份和恢复

bash 复制代码

#!/bin/bash
# backup-restore.sh

# etcd 备份
backup_etcd() {
    BACKUP_DIR="/backup/etcd/$(date +%Y%m%d_%H%M%S)"
    mkdir -p $BACKUP_DIR
    
    ETCDCTL_API=3 etcdctl snapshot save $BACKUP_DIR/etcd-snapshot.db \
        --endpoints=https://127.0.0.1:2379 \
        --cacert=/etc/kubernetes/pki/etcd/ca.crt \
        --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt \
        --key=/etc/kubernetes/pki/etcd/healthcheck-client.key
    
    echo "etcd backup saved to $BACKUP_DIR/etcd-snapshot.db"
}

# 恢复 etcd
restore_etcd() {
    BACKUP_FILE=$1
    
    if [ ! -f "$BACKUP_FILE" ]; then
        echo "Backup file not found: $BACKUP_FILE"
        exit 1
    fi
    
    # 停止 etcd
    systemctl stop etcd
    
    # 移除现有数据
    rm -rf /var/lib/etcd
    
    # 恢复数据
    ETCDCTL_API=3 etcdctl snapshot restore $BACKUP_FILE \
        --name=etcd-1 \
        --initial-cluster=etcd-1=https://192.168.1.10:2380 \
        --initial-advertise-peer-urls=https://192.168.1.10:2380 \
        --data-dir=/var/lib/etcd
    
    # 修改权限
    chown -R etcd:etcd /var/lib/etcd
    
    # 启动 etcd
    systemctl start etcd
    
    echo "etcd restored from $BACKUP_FILE"
}

# 应用数据备份
backup_application_data() {
    echo "Backing up application data..."
    
    # 备份所有PVC
    kubectl get pvc --all-namespaces -o json > /backup/app/pvc-backup-$(date +%Y%m%d).json
    
    # 备份配置
    kubectl get configmaps --all-namespaces -o yaml > /backup/app/configmaps-$(date +%Y%m%d).yaml
    kubectl get secrets --all-namespaces -o yaml > /backup/app/secrets-$(date +%Y%m%d).yaml
    
    # 备份部署配置
    kubectl get deployments --all-namespaces -o yaml > /backup/app/deployments-$(date +%Y%m%d).yaml
    kubectl get statefulsets --all-namespaces -o yaml > /backup/app/statefulsets-$(date +%Y%m%d).yaml
    kubectl get services --all-namespaces -o yaml > /backup/app/services-$(date +%Y%m%d).yaml
}

# 定时备份
setup_cron_backup() {
    cat > /etc/cron.d/k8s-backup << EOF
# 每天凌晨2点备份etcd
0 2 * * * root /usr/local/bin/backup-restore.sh backup_etcd

# 每天凌晨3点备份应用数据
0 3 * * * root /usr/local/bin/backup-restore.sh backup_application_data

# 每周日凌晨4点清理30天前的备份
0 4 * * 0 root find /backup -name "*.db" -mtime +30 -delete
EOF
}

case "$1" in
    backup_etcd)
        backup_etcd
        ;;
    restore_etcd)
        restore_etcd $2
        ;;
    backup_application_data)
        backup_application_data
        ;;
    setup_cron)
        setup_cron_backup
        ;;
    *)
        echo "Usage: $0 {backup_etcd|restore_etcd <backup_file>|backup_application_data|setup_cron}"
        exit 1
        ;;
esac

10. 多集群高可用

10.1 集群联邦

yaml 复制代码

# cluster-federation.yaml
apiVersion: core.kubefed.io/v1beta1
kind: KubeFedCluster
metadata:
  name: cluster1
  namespace: kube-federation-system
spec:
  apiEndpoint: https://cluster1.example.com:6443
  caBundle: LS0tLS1CRUdJTi...  # base64 encoded CA bundle
  secretRef:
    name: cluster1-secret

---
apiVersion: core.kubefed.io/v1beta1
kind: KubeFedCluster  
metadata:
  name: cluster2
  namespace: kube-federation-system
spec:
  apiEndpoint: https://cluster2.example.com:6443
  caBundle: LS0tLS1CRUdJTi...
  secretRef:
    name: cluster2-secret

---
# 联邦部署
apiVersion: types.kubefed.io/v1beta1
kind: FederatedDeployment
metadata:
  name: web-app-federated
  namespace: default
spec:
  template:
    metadata:
      labels:
        app: web-app
    spec:
      replicas: 3
      selector:
        matchLabels:
          app: web-app
      template:
        metadata:
          labels:
            app: web-app
        spec:
          containers:
          - name: web-app
            image: nginx:1.21
            ports:
            - containerPort: 80
  placement:
    clusters:
    - name: cluster1
    - name: cluster2
  overrides:
  - clusterName: cluster1
    clusterOverrides:
    - path: "/spec/replicas"
      value: 5
  - clusterName: cluster2
    clusterOverrides:
    - path: "/spec/replicas"
      value: 3

10.2 跨集群服务发现

yaml 复制代码

# cross-cluster-service.yaml
apiVersion: networking.istio.io/v1beta1
kind: ServiceEntry
metadata:
  name: external-service
spec:
  hosts:
  - external-service.remote-cluster.local
  ports:
  - number: 80
    name: http
    protocol: HTTP
  location: MESH_EXTERNAL
  resolution: DNS
  endpoints:
  - address: external-service.remote-cluster.local

---
# Virtual Service for cross-cluster traffic
apiVersion: networking.istio.io/v1beta1
kind: VirtualService
metadata:
  name: cross-cluster-routing
spec:
  hosts:
  - web-app.example.com
  http:
  - match:
    - headers:
        region:
          exact: us-west
    route:
    - destination:
        host: web-app-service.default.svc.cluster.local
      weight: 100
  - route:
    - destination:
        host: external-service.remote-cluster.local
      weight: 100

总结

Kubernetes 通过多层次的机制保证服务高可用：

控制平面高可用：

多Master节点：API Server、Scheduler、Controller Manager 的多副本部署
etcd集群：分布式键值存储的高可用
负载均衡：API Server 前端负载均衡

应用层高可用：

多副本部署：ReplicaSet/Deployment 确保应用多实例
健康检查：Liveness、Readiness、Startup Probe
滚动更新：零停机更新策略
自动扩缩容：HPA/VPA 根据负载自动调整

基础设施高可用：

节点分布：Pod/Node 反亲和性确保分布部署
存储高可用：多副本存储、跨可用区部署
网络冗余：多网络接口、网络策略
故障容忍：Toleration 和 PDB 机制

运维保障：

监控告警：Prometheus + AlertManager
自动恢复：自动重启、重新调度
备份恢复：定期备份、快速恢复
灾难恢复：跨集群部署、联邦管理

通过这些机制的组合使用，Kubernetes 能够在各种故障场景下保证服务的持续可用性，实现真正的高可用部署。

Similar code found with 2 license types