二、核心机制:控制循环(Reconcile Loop)
核心思想
go
// 伪代码:控制循环的本质
for {
actualState := getActualState() // 获取实际状态(从 K8s API)
desiredState := getDesiredState() // 获取期望状态(从 CRD spec)
if actualState != desiredState {
reconcile(desiredState) // 调协:使实际状态趋向期望状态
}
time.Sleep(syncPeriod)
}
实际代码结构(使用 controller-runtime)
go
// controllers/mysql_controller.go
package controllers
import (
"context"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
databasev1 "my-operator/api/v1" // 你的 CRD
)
// MySQLReconciler 执行实际的调协逻辑
type MySQLReconciler struct {
client.Client
Scheme *runtime.Scheme
}
//+kubebuilder:rbac:groups=database.example.com,resources=mysqls,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups=database.example.com,resources=mysqls/status,verbs=get;update;patch
//+kubebuilder:rbac:groups=apps,resources=deployments,verbs=get;list;watch;create;update;patch;delete
//+kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;patch;delete
func (r *MySQLReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
log := log.FromContext(ctx)
// 1. 获取 CR 实例
mysql := &databasev1.MySQL{}
if err := r.Get(ctx, req.NamespacedName, mysql); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
// 2. 检查是否被删除(处理 Finalizer)
if !mysql.ObjectMeta.DeletionTimestamp.IsZero() {
return r.reconcileDelete(ctx, mysql)
}
// 3. 确保 Finalizer 存在(防止级联删除时数据丢失)
if !controllerutil.ContainsFinalizer(mysql, mysqlFinalizer) {
controllerutil.AddFinalizer(mysql, mysqlFinalizer)
if err := r.Update(ctx, mysql); err != nil {
return ctrl.Result{}, err
}
}
// 4. 调协:确保实际状态 = 期望状态
// 4.1 创建/更新 ConfigMap(MySQL 配置)
if err := r.reconcileConfigMap(ctx, mysql); err != nil {
return ctrl.Result{}, err
}
// 4.2 创建/更新 StatefulSet(MySQL 实例)
if err := r.reconcileStatefulSet(ctx, mysql); err != nil {
return ctrl.Result{}, err
}
// 4.3 创建/更新 Service(访问入口)
if err := r.reconcileService(ctx, mysql); err != nil {
return ctrl.Result{}, err
}
// 4.4 处理主从复制逻辑(自定义运维逻辑)
if mysql.Spec.Replication.Enabled {
if err := r.reconcileReplication(ctx, mysql); err != nil {
return ctrl.Result{}, err
}
}
// 5. 更新状态(status 子资源)
mysql.Status.Phase = "Running"
mysql.Status.ReadyReplicas = mysql.Spec.Replicas
if err := r.Status().Update(ctx, mysql); err != nil {
return ctrl.Result{}, err
}
// 6. 重新入队(定期巡检)
return ctrl.Result{RequeueAfter: 60 * time.Second}, nil
}
// SetupWithManager 设置控制器监听哪些资源
func (r *MySQLReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&databasev1.MySQL{}). // 监听 MySQL CR
Owns(&appsv1.StatefulSet{}). // 监听拥有的 StatefulSet
Owns(&corev1.Service{}). // 监听拥有的 Service
Watches(&source.Kind{Type: &corev1.Pod{}}, // 额外监听 Pod
handler.EnqueueRequestsFromMapFunc(r.findMySQLForPod)).
Complete(r)
}
三、CRD 定义详解
完整 CRD 示例
yaml
# api/v1/mysql_types.go
package v1
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// MySQLSpec 定义期望状态
type MySQLSpec struct {
// +kubebuilder:validation:Minimum=1
// +kubebuilder:default=1
Replicas int32 `json:"replicas,omitempty"`
// +kubebuilder:validation:Required
Version string `json:"version"` // MySQL 版本,如 "8.0"
// 存储配置
Storage StorageSpec `json:"storage"`
// 主从复制配置
Replication ReplicationSpec `json:"replication,omitempty"`
// 备份配置
Backup BackupSpec `json:"backup,omitempty"`
// 资源限制
Resources corev1.ResourceRequirements `json:"resources,omitempty"`
}
type StorageSpec struct {
// +kubebuilder:validation:Enum=standard;ssd;nvme
StorageClass string `json:"storageClass"`
// +kubebuilder:validation:Pattern=^\d+(Gi|Mi)$
Size string `json:"size"` // 如 "100Gi"
}
type ReplicationSpec struct {
Enabled bool `json:"enabled"`
// +kubebuilder:validation:Minimum=1
ReadReplicas int32 `json:"readReplicas,omitempty"`
}
type BackupSpec struct {
Enabled bool `json:"enabled"`
Schedule string `json:"schedule,omitempty"` // Cron 表达式
RetentionDays int `json:"retentionDays,omitempty"`
}
// MySQLStatus 定义实际状态
type MySQLStatus struct {
// +kubebuilder:validation:Enum=Pending;Creating;Running;Failed;Deleting
Phase string `json:"phase,omitempty"`
ReadyReplicas int32 `json:"readyReplicas,omitempty"`
// 当前主库节点
CurrentMaster string `json:"currentMaster,omitempty"`
// 复制延迟(秒)
ReplicationLag int64 `json:"replicationLag,omitempty"`
// 最后备份时间
LastBackupTime *metav1.Time `json:"lastBackupTime,omitempty"`
// 条件列表(详细状态)
Conditions []metav1.Condition `json:"conditions,omitempty"`
}
//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:resource:scope=Namespaced,shortName=ms
//+kubebuilder:printcolumn:name="Version",type=string,JSONPath=`.spec.version`
//+kubebuilder:printcolumn:name="Replicas",type=integer,JSONPath=`.spec.replicas`
//+kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase`
//+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
type MySQL struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec MySQLSpec `json:"spec,omitempty"`
Status MySQLStatus `json:"status,omitempty"`
}
//+kubebuilder:object:root=true
type MySQLList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []MySQL `json:"items"`
}
生成的 CRD YAML
yaml
# config/crd/bases/database.example.com_mysqls.yaml
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
name: mysqls.database.example.com
spec:
group: database.example.com
names:
kind: MySQL
listKind: MySQLList
plural: mysqls
singular: mysql
shortNames:
- ms
scope: Namespaced
versions:
- name: v1
served: true
storage: true
schema:
openAPIV3Schema:
type: object
properties:
spec:
type: object
required: ["version"]
properties:
replicas:
type: integer
minimum: 1
default: 1
version:
type: string
storage:
type: object
properties:
storageClass:
type: string
enum: ["standard", "ssd", "nvme"]
size:
type: string
pattern: '^\d+(Gi|Mi)$'
# ... 其他字段
status:
type: object
properties:
phase:
type: string
enum: ["Pending", "Creating", "Running", "Failed", "Deleting"]
# ... 其他字段
subresources:
status: {} # 启用 status 子资源
additionalPrinterColumns:
- name: Version
type: string
jsonPath: .spec.version
- name: Replicas
type: integer
jsonPath: .spec.replicas
- name: Phase
type: string
jsonPath: .status.phase
四、高级模式与最佳实践
1. 复杂运维逻辑:主从切换
go
// 故障检测与自动切换
func (r *MySQLReconciler) reconcileReplication(ctx context.Context, mysql *v1.MySQL) error {
// 1. 获取所有 Pod
pods := &corev1.PodList{}
selector := labels.SelectorFromSet(map[string]string{
"app": "mysql",
"cluster": mysql.Name,
})
if err := r.List(ctx, pods, client.MatchingLabelsSelector{Selector: selector}); err != nil {
return err
}
// 2. 检查主库健康
masterPod := findMasterPod(pods, mysql.Status.CurrentMaster)
if masterPod == nil || !isPodHealthy(masterPod) {
// 主库故障,触发切换
newMaster := electNewMaster(pods, masterPod)
if newMaster == nil {
return fmt.Errorf("no available replica for failover")
}
// 3. 执行切换(调用 MySQL 命令)
if err := r.performFailover(ctx, mysql, newMaster); err != nil {
mysql.Status.Phase = "FailoverFailed"
return err
}
// 4. 更新状态
mysql.Status.CurrentMaster = newMaster.Name
r.Recorder.Eventf(mysql, corev1.EventTypeWarning, "Failover",
"Master switched from %s to %s", masterPod.Name, newMaster.Name)
}
// 5. 检查复制延迟
lag := checkReplicationLag(pods)
mysql.Status.ReplicationLag = lag
return nil
}
func (r *MySQLReconciler) performFailover(ctx context.Context, mysql *v1.MySQL, newMaster *corev1.Pod) error {
// 在新主库上执行:STOP SLAVE; RESET SLAVE ALL;
// 在其他从库上执行:CHANGE MASTER TO MASTER_HOST=newMaster;
// 更新 Service Selector 指向新主库
// 使用 sidecar 或 exec 进入容器执行 MySQL 命令
cmd := []string{"mysql", "-e", "STOP SLAVE; RESET SLAVE ALL;"}
return r.execInPod(ctx, newMaster, cmd)
}
2. Webhook 校验与默认值
go
// api/v1/mysql_webhook.go
package v1
import (
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook"
)
var mysqllog = logf.Log.WithName("mysql-webhook")
func (r *MySQL) SetupWebhookWithManager(mgr ctrl.Manager) error {
return ctrl.NewWebhookManagedBy(mgr).
For(r).
Complete()
}
//+kubebuilder:webhook:path=/mutate-database-example-com-v1-mysql,mutating=true,failurePolicy=fail,sideEffects=None,groups=database.example.com,resources=mysqls,verbs=create;update,versions=v1,name=mmysql.kb.io,admissionReviewVersions=v1
var _ webhook.Defaulter = &MySQL{}
// Default 设置默认值
func (r *MySQL) Default() {
mysqllog.Info("default", "name", r.Name)
if r.Spec.Replicas == 0 {
r.Spec.Replicas = 1
}
if r.Spec.Storage.Size == "" {
r.Spec.Storage.Size = "10Gi"
}
// 添加默认标签
if r.Labels == nil {
r.Labels = make(map[string]string)
}
r.Labels["app.kubernetes.io/managed-by"] = "mysql-operator"
}
//+kubebuilder:webhook:path=/validate-database-example-com-v1-mysql,mutating=false,failurePolicy=fail,sideEffects=None,groups=database.example.com,resources=mysqls,verbs=create;update,versions=v1,name=vmysql.kb.io,admissionReviewVersions=v1
var _ webhook.Validator = &MySQL{}
// ValidateCreate 创建时校验
func (r *MySQL) ValidateCreate() error {
mysqllog.Info("validate create", "name", r.Name)
if r.Spec.Version == "" {
return fmt.Errorf("version is required")
}
if r.Spec.Replication.Enabled && r.Spec.Replicas < 2 {
return fmt.Errorf("replicas must be >= 2 when replication is enabled")
}
// 校验存储大小格式
if !validStorageSize(r.Spec.Storage.Size) {
return fmt.Errorf("invalid storage size format: %s", r.Spec.Storage.Size)
}
return nil
}
// ValidateUpdate 更新时校验(防止危险变更)
func (r *MySQL) ValidateUpdate(old runtime.Object) error {
oldMySQL := old.(*MySQL)
// 禁止缩小存储(数据丢失风险)
if r.Spec.Storage.Size != oldMySQL.Spec.Storage.Size {
oldSize := parseSize(oldMySQL.Spec.Storage.Size)
newSize := parseSize(r.Spec.Storage.Size)
if newSize < oldSize {
return fmt.Errorf("storage size cannot be reduced")
}
}
// 禁止运行时切换存储类
if r.Spec.Storage.StorageClass != oldMySQL.Spec.Storage.StorageClass {
return fmt.Errorf("storageClass is immutable")
}
return nil
}
3. 依赖管理:等待外部资源就绪
go
// 处理依赖资源(如等待 PVC 绑定)
func (r *MySQLReconciler) reconcilePVC(ctx context.Context, mysql *v1.MySQL) error {
pvc := &corev1.PersistentVolumeClaim{}
err := r.Get(ctx, types.NamespacedName{
Name: mysql.Name + "-data",
Namespace: mysql.Namespace,
}, pvc)
if err != nil {
if errors.IsNotFound(err) {
// 创建 PVC
pvc = r.buildPVC(mysql)
if err := r.Create(ctx, pvc); err != nil {
return err
}
// 创建后重新入队,等待绑定
return fmt.Errorf("PVC created, waiting for binding")
}
return err
}
// 检查 PVC 状态
if pvc.Status.Phase != corev1.ClaimBound {
// 未绑定,稍后重试
return fmt.Errorf("PVC %s not bound yet, phase: %s", pvc.Name, pvc.Status.Phase)
}
return nil
}
五、Operator 开发工具链
1. kubebuilder 脚手架
bash
# 1. 初始化项目
mkdir mysql-operator && cd mysql-operator
kubebuilder init --domain example.com --repo github.com/example/mysql-operator
# 2. 创建 API 和 Controller
kubebuilder create api --group database --version v1 --kind MySQL
# 3. 添加 Webhook
kubebuilder create webhook --group database --version v1 --kind MySQL --defaulting --programmatic-validation
# 4. 生成代码和清单
make generate # 生成 DeepCopy 代码
make manifests # 生成 CRD YAML
make install # 安装 CRD 到集群
# 5. 本地调试
make run # 本地运行 Controller(连接 K8s 集群)
# 6. 构建镜像
make docker-build IMG=registry.example.com/mysql-operator:v1.0.0
make deploy IMG=registry.example.com/mysql-operator:v1.0.0
2. 项目结构
csharp
mysql-operator/
├── api/
│ └── v1/
│ ├── mysql_types.go # CRD 定义
│ ├── mysql_webhook.go # 校验逻辑
│ ├── groupversion_info.go # Scheme 注册
│ └── zz_generated.deepcopy.go # 自动生成的代码
├── controllers/
│ ├── mysql_controller.go # 主控制器
│ ├── mysql_controller_test.go # 单元测试
│ └── suite_test.go # 测试套件
├── config/
│ ├── crd/ # CRD YAML
│ ├── manager/ # Operator 部署配置
│ ├── rbac/ # RBAC 权限
│ └── samples/ # 示例 CR
├── internal/
│ ├── mysql/ # MySQL 操作封装
│ └── utils/ # 工具函数
├── main.go # 入口
├── go.mod
└── Makefile
六、面试高频问题
Q1: Operator 和 Helm 的区别?
| 维度 | Helm | Operator |
|---|---|---|
| 定位 | 包管理器(模板渲染) | 运维自动化(控制循环) |
| 能力 | 安装/升级/删除 | 全生命周期管理 + 自动化运维 |
| 状态管理 | 无状态,不监听资源 | 持续调协,处理异常 |
| 适用场景 | 无状态应用部署 | 有状态复杂应用(数据库、中间件) |
| 示例 | helm install nginx |
自动故障恢复、备份、升级 |
关系:Helm 可以安装 Operator,Operator 可以管理 Helm Release
Q2: 如何保证 Operator 的高可用?
go
// 1. 多副本部署
// config/manager/manager.yaml
spec:
replicas: 3 // 3 个 Controller 副本
// 2. 选举机制(Leader Election)
func main() {
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
LeaderElection: true, // 启用选举
LeaderElectionID: "mysql-operator-lock", // 锁名称
LeaderElectionNamespace: "operators", // 锁所在的 NS
})
}
// 3. 优雅关闭(处理完当前 Reconcile 再退出)
// 通过 context 传递取消信号
Q3: 如何处理并发冲突?
go
// 乐观锁:使用 ResourceVersion
func (r *MySQLReconciler) updateWithRetry(ctx context.Context, mysql *v1.MySQL) error {
return retry.RetryOnConflict(retry.DefaultRetry, func() error {
// 重新获取最新版本
latest := &v1.MySQL{}
if err := r.Get(ctx, client.ObjectKeyFromObject(mysql), latest); err != nil {
return err
}
// 修改最新版本
latest.Status.Phase = "Running"
// 更新(如果 ResourceVersion 不一致会返回冲突错误,自动重试)
return r.Status().Update(ctx, latest)
})
}
Q4: Operator 的性能优化?
go
// 1. 限制并发数
func main() {
mgr, err := ctrl.NewManager(cfg, ctrl.Options{
// 限制同时处理的 Reconcile 数量
MaxConcurrentReconciles: 10,
})
}
// 2. 智能重试(指数退避)
return ctrl.Result{RequeueAfter: time.Second * time.Duration(math.Pow(2, retryCount))}, err
// 3. 事件过滤(减少不必要的 Reconcile)
func (r *MySQLReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&v1.MySQL{}).
WithEventFilter(predicate.GenerationChangedPredicate{}). // 只监听 spec 变化,忽略 status 更新
Complete(r)
}
// 4. 批量处理(使用 workqueue 的限速队列)
七、实战:部署 MySQL Operator
yaml
# 1. 安装 CRD
kubectl apply -f config/crd/bases/
# 2. 部署 Operator
kubectl apply -f config/manager/
# 3. 创建 MySQL 集群
# config/samples/database_v1_mysql.yaml
apiVersion: database.example.com/v1
kind: MySQL
metadata:
name: production-db
spec:
version: "8.0"
replicas: 3
storage:
storageClass: "ssd"
size: "500Gi"
replication:
enabled: true
readReplicas: 2
backup:
enabled: true
schedule: "0 2 * * *" # 每天凌晨2点
retentionDays: 7
resources:
requests:
memory: "4Gi"
cpu: "2"
limits:
memory: "8Gi"
cpu: "4"
# 4. 查看状态
$ kubectl get mysql
NAME VERSION REPLICAS PHASE AGE
production-db 8.0 3 Running 10m
$ kubectl describe mysql production-db
...
Status:
Current Master: production-db-0
Phase: Running
Ready Replicas: 3
Replication Lag: 0
Conditions:
Type: Ready
Status: True
Last Transition: 2024-01-15T10:30:00Z
Reason: AllReplicasReady
Message: All 3 replicas are running and synced
核心记忆点:
Operator 的本质是将"运维人员的领域知识"(如何部署、扩缩容、备份、故障恢复)编码为软件,通过 Kubernetes 的控制循环机制实现有状态应用的自动化全生命周期管理。