自动化运维命令Operator项目开发
- 核心功能:
- 通过CRD定义运维任务
- Operator自动执行任务
- 状态监控
- 技术点
- 集成prometheus指标
- 对接webhook完成mutate和validate
- 根据cron规则自动创建cronjob实现自动执行
- kubebuilder框架快速上手
- 根据创建的job完成情况实时更新自定义资源状态
一、初始化Operator项目
csharp
#--deomain自定义域名,--repo自定义go module路径
kubebuilder init --domain example.com --repo gops-operator
二、创建API和Controller
css
#--group自定义API组,--version自定义版本号,--kind自定义资源名称
kubebuilder create api --group ops --version v1 --kind Task
三、创建自定义资源定义CRD
yaml
#编辑config/crd/bases/ops.example.com_tasks.yaml文件,添加spec和status字段
---
apiVersion: apiextensions.k8s.io/v1
kind: CustomResourceDefinition
metadata:
annotations:
controller-gen.kubebuilder.io/version: v0.17.1
name: tasks.ops.example.com
spec:
group: ops.example.com
names:
kind: Task
listKind: TaskList
plural: tasks
singular: task
scope: Namespaced
versions:
- additionalPrinterColumns:
- jsonPath: .spec.command
name: Command
type: string
- jsonPath: .status.state
name: State
type: string
- jsonPath: .status.result
name: Result
type: string
- jsonPath: .status.lastRunTime
name: LastRunTime
type: string
- jsonPath: .metadata.creationTimestamp
name: Age
type: date
name: v1
schema:
openAPIV3Schema:
description: Task is the Schema for the tasks API.
properties:
apiVersion:
description: |-
APIVersion defines the versioned schema of this representation of an object.
Servers should convert recognized schemas to the latest internal value, and
may reject unrecognized values.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
type: string
kind:
description: |-
Kind is a string value representing the REST resource this object represents.
Servers may infer this from the endpoint the client submits requests to.
Cannot be updated.
In CamelCase.
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
type: string
metadata:
type: object
spec:
description: TaskSpec defines the desired state of Task.
properties:
args:
description: 任务需要执行的命令的参数
items:
type: string
type: array
command:
description: 任务需要执行的命令
type: string
config:
description: 外部化配置
type: string
image:
description: 镜像
type: string
ip:
description: 远程服务器IP
type: string
port:
description: 远程服务器端口
type: integer
privateKeySecretRef:
description: |-
远程服务器私钥base64加密
PrivateKey string `json:"privateKey"`
远程服务器私钥secret
properties:
key:
description: The key of the secret to select from. Must be a
valid secret key.
type: string
name:
default: ""
description: |-
Name of the referent.
This field is effectively required, but due to backwards compatibility is
allowed to be empty. Instances of this type with an empty value here are
almost certainly wrong.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names
type: string
optional:
description: Specify whether the Secret or its key must be defined
type: boolean
required:
- key
type: object
x-kubernetes-map-type: atomic
retries:
description: job重试次数
type: integer
schedule:
description: 定时执行表达式(例如cron格式)
type: string
username:
description: 远程服务器用户名
type: string
required:
- command
- image
- ip
- port
- privateKeySecretRef
- retries
- username
type: object
status:
description: TaskStatus defines the observed state of Task.
properties:
lastRunTime:
description: 最近一次执行时间
type: string
result:
description: 任务的执行结果
type: string
state:
description: 任务当前状态pending、running、completed、failed
type: string
required:
- lastRunTime
- result
- state
type: object
type: object
served: true
storage: true
subresources:
status: {}
四、实现controller逻辑编写加入自定义prometheus指标
go
#编辑internal\controller\task_controller.go
/*
Copyright 2025.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package controller
import (
"bufio"
"context"
"errors"
"fmt"
"sort"
"strconv"
"strings"
"time"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/crypto/ssh"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/metrics"
opsv1 "ops-operator/api/v1"
batchv1 "k8s.io/api/batch/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
corev1 "k8s.io/api/core/v1"
)
var startTime time.Time
// TaskReconciler reconciles a Task object
type TaskReconciler struct {
client.Client
Scheme *runtime.Scheme
}
// +kubebuilder:rbac:groups=ops.example.com,resources=tasks,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=ops.example.com,resources=tasks/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=ops.example.com,resources=tasks/finalizers,verbs=update
// +kubebuilder:rbac:groups=batch,resources=cronjobs,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=batch,resources=cronjobs/status,verbs=get;update;patch
// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
// TODO(user): Modify the Reconcile function to compare the state specified by
// the Task object against the actual cluster state, and then
// perform operations to make the cluster state reflect the state specified by
// the user.
//
// For more details, check Reconcile and its Result here:
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
func (r *TaskReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
_ = log.FromContext(ctx)
var task opsv1.Task
if err := r.Get(ctx, req.NamespacedName, &task); err != nil {
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
return ctrl.Result{}, err
}
// 任务创建时增加计数器
ScheduledTaskTotal.Inc()
if task.Spec.Schedule == "" {
if task.Status.State == "Running" || task.Status.State == "Completed" {
return ctrl.Result{}, nil
}
//设置任务状态为运行中
task.Status.State = "Running"
task.Status.LastRunTime = time.Now().Format("2006-01-02 15:04:05")
startTime, _ := time.Parse("2006-01-02 15:04:05", task.Status.LastRunTime)
// 任务运行时增加计数器
ScheduledTaskActive.Inc()
if err := r.Status().Update(ctx, &task); err != nil {
return ctrl.Result{}, err
}
result, err := r.executeTask(ctx, &task)
if err != nil {
task.Status.State = "Failed"
task.Status.Result = err.Error()
// 任务执行失败时增加计数器
ScheduledTaskErrors.Inc()
// 任务运行时减少计数器
ScheduledTaskActive.Dec()
} else {
task.Status.State = "Completed"
task.Status.Result = result
// 任务运行时减少计数器
ScheduledTaskActive.Dec()
}
// 监听任务执行时间
ScheduledTaskDuration.Observe(time.Since(startTime).Seconds())
//更新任务状态
if err := r.Status().Update(ctx, &task); err != nil {
return ctrl.Result{}, err
}
} else {
cronjob := createCronjob(&task, task.Spec.Command, task.Namespace)
//设置controller引用
if err := ctrl.SetControllerReference(&task, cronjob, r.Scheme); err != nil {
// 设置controller引用失败时增加计数器
ScheduledTaskErrors.Inc()
return ctrl.Result{}, fmt.Errorf("设置controller引用失败: %v", err)
}
// 任务创建时增加计数器
ScheduledTaskActive.Inc()
//检查cronjob是否存在
existingCronjob := &batchv1.CronJob{}
if err := r.Get(ctx, client.ObjectKeyFromObject(cronjob), existingCronjob); err != nil {
if apierrors.IsNotFound(err) {
if err := r.Create(ctx, cronjob); err != nil {
// 重新获取最新的task对象
if err := r.Get(ctx, req.NamespacedName, &task); err != nil {
return ctrl.Result{}, err
}
task.Status.State = "Failed"
task.Status.Result = fmt.Sprintf("创建cronjob失败: %v", err)
// 创建cronjob失败时增加计数器
ScheduledTaskErrors.Inc()
// 任务运行时减少计数器
ScheduledTaskActive.Dec()
} else {
if err := r.Get(ctx, req.NamespacedName, &task); err != nil {
return ctrl.Result{}, err
}
task.Status.State = "Completed"
task.Status.Result = "cronjob创建成功"
// 创建cronjob成功时增加计数器
ScheduledTaskTotal.Inc()
// 任务运行时减少计数器
ScheduledTaskActive.Dec()
}
} else {
if err := r.Get(ctx, req.NamespacedName, &task); err != nil {
return ctrl.Result{}, err
}
task.Status.State = "Failed"
task.Status.Result = fmt.Sprintf("获取cronjob失败: %v", err)
// 获取cronjob失败时增加计数器
ScheduledTaskErrors.Inc()
// 任务运行时减少计数器
ScheduledTaskActive.Dec()
}
} else {
err := r.Update(ctx, cronjob)
if err != nil {
if err := r.Get(ctx, req.NamespacedName, &task); err != nil {
return ctrl.Result{}, err
}
task.Status.State = "Failed"
task.Status.Result = fmt.Sprintf("更新cronjob失败: %v", err)
// 更新cronjob失败时增加计数器
ScheduledTaskErrors.Inc()
} else {
//更新最近一次job运行结果
if err := r.Get(ctx, req.NamespacedName, &task); err != nil {
return ctrl.Result{}, err
}
if err := r.updateTaskResult(ctx, &task); err != nil {
task.Status.State = "Failed"
task.Status.Result = fmt.Sprintf("更新任务状态失败: %v", err)
// 更新任务状态失败时增加计数器
ScheduledTaskErrors.Inc()
}
// 创建cronjob成功时增加计数器
ScheduledTaskTotal.Inc()
// 任务运行时减少计数器
ScheduledTaskActive.Dec()
}
}
//更新任务状态
if err := r.Status().Update(ctx, &task); err != nil {
return ctrl.Result{}, err
}
}
return ctrl.Result{}, nil
}
// SetupWithManager sets up the controller with the Manager.
func (r *TaskReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&opsv1.Task{}).
Owns(&batchv1.CronJob{}).
Named("task").
Complete(r)
}
// executeTask 执行任务
func (r *TaskReconciler) executeTask(ctx context.Context, task *opsv1.Task) (string, error) {
sshconfig := &SSHConfig{
Host: task.Spec.Ip,
Port: task.Spec.Port,
User: task.Spec.Username,
// PrivateKey: task.Spec.PrivateKey,
PrivateKeySecretRef: task.Spec.PrivateKeySecretRef,
}
var command string
if len(task.Spec.Args) > 0 {
command = task.Spec.Command + " " + strings.Join(task.Spec.Args, " ")
} else {
command = task.Spec.Command
}
result, err := r.remote_cmd(ctx, sshconfig, command, task.Namespace)
if err != nil {
return "", err
}
return result, nil
}
// SSHConfig 包含 SSH 连接所需的信息
type SSHConfig struct {
Host string
Port int
User string
//cat /root/.ssh/id_rsa |base64 -w 0生成私钥base64编码
// PrivateKey string
PrivateKeySecretRef *corev1.SecretKeySelector
}
// remote_cmd 执行远程命令
func (r *TaskReconciler) remote_cmd(ctx context.Context, sshconfig *SSHConfig, cmd string, namespace string) (string, error) {
var auth []ssh.AuthMethod
var result string
// 解析私钥
// decodekey, err := base64.StdEncoding.DecodeString(sshconfig.PrivateKey)
// 获取私钥
secret := &corev1.Secret{}
if err := r.Get(ctx, client.ObjectKey{Name: sshconfig.PrivateKeySecretRef.Name, Namespace: namespace}, secret); err != nil {
return "", err
}
// 解码私钥
decodekey, ok := secret.Data[sshconfig.PrivateKeySecretRef.Key]
if !ok {
return "", errors.New("private key not found")
}
// 解析私钥
key, err := ssh.ParsePrivateKey(decodekey)
if err != nil {
return "", err
}
// 添加认证方法
auth = append(auth, ssh.PublicKeys(key))
// 配置SSH客户端
config := &ssh.ClientConfig{
User: sshconfig.User,
Auth: auth,
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
}
client, err := ssh.Dial("tcp", fmt.Sprintf("%s:%d", sshconfig.Host, sshconfig.Port), config)
if err != nil {
return "", err
}
defer client.Close()
// 创建会话
session, err := client.NewSession()
if err != nil {
return "", err
}
defer session.Close()
// 获取标准输出
stdout, err := session.StdoutPipe()
if err != nil {
return "", err
}
// 执行命令
if err := session.Run(cmd); err != nil {
return "", err
}
// 读取标准输出
scanner := bufio.NewScanner(stdout)
for scanner.Scan() {
result += scanner.Text() + "\n"
}
return result, nil
}
// 创建kubernetes cronjob
func createCronjob(task *opsv1.Task, cmd string, namespace string) *batchv1.CronJob {
return &batchv1.CronJob{
ObjectMeta: metav1.ObjectMeta{
Name: task.Name,
Namespace: namespace,
},
Spec: batchv1.CronJobSpec{
Schedule: task.Spec.Schedule,
JobTemplate: batchv1.JobTemplateSpec{
//设置job的标签
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"cronjob": task.Name,
},
},
Spec: batchv1.JobSpec{
BackoffLimit: &[]int32{int32(task.Spec.Retries)}[0],
Template: corev1.PodTemplateSpec{
//设置pod的标签
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"cronjob": task.Name,
},
},
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyOnFailure,
Containers: []corev1.Container{
{
Name: task.Name,
Image: task.Spec.Image,
Command: []string{
"sh", "-c",
"cp /root/.ssh/id_rsa /tmp/id_rsa && chmod 600 /tmp/id_rsa && output=$(ssh -i /tmp/id_rsa -p " + strconv.Itoa(task.Spec.Port) + " -o StrictHostKeyChecking=no " + task.Spec.Username + "@" + task.Spec.Ip + " " + "'" + cmd + "' 2>&1) && echo \"COMMAND OUTPUT: $output\"",
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "ssh-key",
MountPath: "/root/.ssh/id_rsa",
SubPath: "id_rsa",
},
},
},
},
Volumes: []corev1.Volume{
{
Name: "ssh-key",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: task.Spec.PrivateKeySecretRef.Name,
Items: []corev1.KeyToPath{
{
Key: "id_rsa",
Path: "id_rsa",
},
},
},
},
},
},
},
},
},
},
},
}
}
// 从cronjob中获取任务结果
func (r *TaskReconciler) updateTaskResult(ctx context.Context, task *opsv1.Task) error {
stoptime := time.Now()
var joblist batchv1.JobList
//获取指定cronjob的job
if err := r.List(ctx, &joblist, client.InNamespace(task.Namespace), client.MatchingLabels{"cronjob": task.Name}); err != nil {
return fmt.Errorf("获取job列表失败: %v", err)
}
//获取最近一次的job的状态
sort.Slice(joblist.Items, func(i, j int) bool {
return joblist.Items[i].CreationTimestamp.After(joblist.Items[j].CreationTimestamp.Time)
})
if len(joblist.Items) == 0 {
return fmt.Errorf("job列表为空")
}
//获取job的执行状态,获取job的执行时间
latestJob := joblist.Items[0]
if latestJob.Status.Succeeded > 0 {
task.Status.State = "Completed"
task.Status.Result = "Latest Job Completed"
task.Status.LastRunTime = latestJob.CreationTimestamp.Format("2006-01-02 15:04:05")
} else if latestJob.Status.Failed > 0 {
task.Status.State = "Failed"
task.Status.Result = "Latest Job Failed"
task.Status.LastRunTime = latestJob.CreationTimestamp.Format("2006-01-02 15:04:05")
} else {
task.Status.State = "Running"
task.Status.Result = "Latest Job Running"
task.Status.LastRunTime = latestJob.CreationTimestamp.Format("2006-01-02 15:04:05")
}
if task.Status.State == "Running" {
startTime = latestJob.CreationTimestamp.Time
} else {
ScheduledTaskDuration.Observe(stoptime.Sub(startTime).Seconds())
}
return nil
}
var (
ScheduledTaskTotal = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "scheduledtask_total",
Help: "Total number of ScheduledTasks created",
})
ScheduledTaskErrors = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "scheduledtask_errors_total",
Help: "Total number of failed ScheduledTasks",
})
ScheduledTaskDuration = prometheus.NewHistogram(
prometheus.HistogramOpts{
Name: "scheduledtask_duration_seconds",
Help: "Histogram of ScheduledTask execution duration",
Buckets: prometheus.LinearBuckets(1, 2, 5), // 从1秒开始,每个桶增加2秒,共5个桶
})
ScheduledTaskActive = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "scheduledtask_active",
Help: "Number of currently running ScheduledTasks",
})
)
func init() {
metrics.Registry.MustRegister(ScheduledTaskTotal)
metrics.Registry.MustRegister(ScheduledTaskErrors)
metrics.Registry.MustRegister(ScheduledTaskDuration)
metrics.Registry.MustRegister(ScheduledTaskActive)
}
五、webhook功能加入
通过kubebuilder框架自动生成webhook相关代码
css
#--group,--version,--kingd保持同上面自定义operator一致
#--defaulting生成mutating,--programmatic-validation生成validating代码
kubebuilder create webhook --group ops --version v1 --kind Task --defaulting --programmatic-validation
webhook功能编写
go
#编辑internal/webhook/v1/task_webhook.go
/*
Copyright 2025.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1
import (
"context"
"fmt"
"net"
cron "github.com/robfig/cron/v3"
"k8s.io/apimachinery/pkg/runtime"
ctrl "sigs.k8s.io/controller-runtime"
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/webhook"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
opsv1 "ops-operator/api/v1"
)
// nolint:unused
// log is for logging in this package.
var tasklog = logf.Log.WithName("task-resource")
// SetupTaskWebhookWithManager registers the webhook for Task in the manager.
func SetupTaskWebhookWithManager(mgr ctrl.Manager) error {
return ctrl.NewWebhookManagedBy(mgr).For(&opsv1.Task{}).
WithValidator(&TaskCustomValidator{}).
WithDefaulter(&TaskCustomDefaulter{}).
Complete()
}
// TODO(user): EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// +kubebuilder:webhook:path=/mutate-ops-example-com-v1-task,mutating=true,failurePolicy=fail,sideEffects=None,groups=ops.example.com,resources=tasks,verbs=create;update,versions=v1,name=mtask-v1.kb.io,admissionReviewVersions=v1
// TaskCustomDefaulter struct is responsible for setting default values on the custom resource of the
// Kind Task when those are created or updated.
//
// NOTE: The +kubebuilder:object:generate=false marker prevents controller-gen from generating DeepCopy methods,
// as it is used only for temporary operations and does not need to be deeply copied.
type TaskCustomDefaulter struct {
// TODO(user): Add more fields as needed for defaulting
}
var _ webhook.CustomDefaulter = &TaskCustomDefaulter{}
// Default implements webhook.CustomDefaulter so a webhook will be registered for the Kind Task.
func (d *TaskCustomDefaulter) Default(ctx context.Context, obj runtime.Object) error {
task, ok := obj.(*opsv1.Task)
if !ok {
return fmt.Errorf("expected an Task object but got %T", obj)
}
tasklog.Info("Defaulting for Task", "name", task.GetName())
// 如果retries没有设置,则设置为3
if task.Spec.Retries == 0 {
task.Spec.Retries = 3
}
// 如果port没有设置,则设置为22
if task.Spec.Port == 0 {
task.Spec.Port = 22
}
return nil
}
// TODO(user): change verbs to "verbs=create;update;delete" if you want to enable deletion validation.
// NOTE: The 'path' attribute must follow a specific pattern and should not be modified directly here.
// Modifying the path for an invalid path can cause API server errors; failing to locate the webhook.
// +kubebuilder:webhook:path=/validate-ops-example-com-v1-task,mutating=false,failurePolicy=fail,sideEffects=None,groups=ops.example.com,resources=tasks,verbs=create;update,versions=v1,name=vtask-v1.kb.io,admissionReviewVersions=v1
// TaskCustomValidator struct is responsible for validating the Task resource
// when it is created, updated, or deleted.
//
// NOTE: The +kubebuilder:object:generate=false marker prevents controller-gen from generating DeepCopy methods,
// as this struct is used only for temporary operations and does not need to be deeply copied.
type TaskCustomValidator struct {
// TODO(user): Add more fields as needed for validation
}
var _ webhook.CustomValidator = &TaskCustomValidator{}
// ValidateCreate implements webhook.CustomValidator so a webhook will be registered for the type Task.
func (v *TaskCustomValidator) ValidateCreate(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
task, ok := obj.(*opsv1.Task)
if !ok {
return nil, fmt.Errorf("expected a Task object but got %T", obj)
}
tasklog.Info("Validation for Task upon creation", "name", task.GetName())
// 如果command为空,则返回错误
if task.Spec.Command == "" {
return nil, fmt.Errorf("command is required")
}
// 如果schedule格式不正确,则返回错误
if task.Spec.Schedule != "" {
_, err := cron.ParseStandard(task.Spec.Schedule)
if err != nil {
return nil, fmt.Errorf("schedule is invalid")
}
}
// 如果ip格式不正确,则返回错误
if task.Spec.Ip != "" {
ip := net.ParseIP(task.Spec.Ip)
if ip == nil {
return nil, fmt.Errorf("ip is invalid")
}
}
return nil, nil
}
// ValidateUpdate implements webhook.CustomValidator so a webhook will be registered for the type Task.
func (v *TaskCustomValidator) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) {
task, ok := newObj.(*opsv1.Task)
if !ok {
return nil, fmt.Errorf("expected a Task object for the newObj but got %T", newObj)
}
tasklog.Info("Validation for Task upon update", "name", task.GetName())
// 如果command为空,则返回错误
if task.Spec.Command == "" {
return nil, fmt.Errorf("command is required")
}
// 如果schedule格式不正确,则返回错误
if task.Spec.Schedule != "" {
_, err := cron.ParseStandard(task.Spec.Schedule)
if err != nil {
return nil, fmt.Errorf("schedule is invalid")
}
}
// 如果ip格式不正确,则返回错误
if task.Spec.Ip != "" {
ip := net.ParseIP(task.Spec.Ip)
if ip == nil {
return nil, fmt.Errorf("ip is invalid")
}
}
return nil, nil
}
// ValidateDelete implements webhook.CustomValidator so a webhook will be registered for the type Task.
func (v *TaskCustomValidator) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
task, ok := obj.(*opsv1.Task)
if !ok {
return nil, fmt.Errorf("expected a Task object but got %T", obj)
}
tasklog.Info("Validation for Task upon deletion", "name", task.GetName())
// TODO(user): fill in your validation logic upon object deletion.
return nil, nil
}
六、部署
1.通过makefile生成二进制文件,crd文件
go
#二进制文件位置bin/manager,make build会自动调用运行make manifests生成CRD YAML
make build
2.通过makefile生成k8s部署yaml,主要用其中关于webhook定义部分
2.1编辑Makefile,添加下面内容
bash
.PHONY: generate-install
generate-install: manifests kustomize ## 生成安装文件(不部署)
@mkdir -p dist # 创建输出目录
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
$(KUSTOMIZE) build config/default > dist/install.yaml
2.2手工生成webhook通信证书
sh
#由于本次部署二进制运行在服务器上,因此ipsan配置为服务器ip地址(IP.1 = 172.31.6.101)
openssl genrsa -out ca.key 2048
openssl req -x509 -new -nodes -key ca.key -subj "/CN=webhook-ca" -days 365 -out ca.crt
openssl genrsa -out tls.key 2048
cat > openssl.cnf <<EOF
[ req ]
default_bits = 2048
prompt = no
default_md = sha256
req_extensions = req_ext
distinguished_name = dn
[ dn ]
CN = *.default.svc
[ req_ext ]
subjectAltName = @alt_names
[ alt_names ]
DNS.1 = *.default.svc
DNS.2 = *.default.svc.cluster.local
IP.1 = 172.31.6.101
EOF
openssl req -new -key tls.key -out tls.csr -config openssl.cnf
openssl x509 -req -in tls.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out tls.crt -days 365 -extensions req_ext -extfile openssl.cnf
2.3编辑webhook定义(内容来源install.yaml)
yaml
#caBundle为ca.crt的base64编码
#通过以下命令生成
#cat ca.crt|base64| tr -d '\n' > ca.crt.base64
apiVersion: v1
kind: Namespace
metadata:
labels:
app.kubernetes.io/managed-by: kustomize
app.kubernetes.io/name: ops-operator
control-plane: controller-manager
name: ops-operator-system
---
## Service 定义(无 selector)
#apiVersion: v1
#kind: Service
#metadata:
# name: ops-operator-webhook-service
# namespace: ops-operator-system
# labels:
# app.kubernetes.io/managed-by: kustomize
# app.kubernetes.io/name: ops-operator
#spec:
# ports:
# - port: 443 # Service 暴露的端口
# protocol: TCP
# targetPort: 9443 # 外部服务的端口(需与 Endpoints 中的端口一致)
#---
# Endpoints 定义(指向外部 IP)
#apiVersion: v1
#kind: Endpoints
#metadata:
# name: ops-operator-webhook-service # 必须与 Service 同名
#subsets:
#- addresses:
# - ip: 172.31.6.101 # 外部服务的 IP 地址
# ports:
# - port: 9443 # 外部服务的端口
#---
apiVersion: admissionregistration.k8s.io/v1
kind: MutatingWebhookConfiguration
metadata:
name: ops-operator-mutating-webhook-configuration
webhooks:
- admissionReviewVersions:
- v1
clientConfig:
# service:
# name: ops-operator-webhook-service
# namespace: ops-operator-system
# path: /mutate-ops-example-com-v1-task
url: "https://172.31.6.101:9443/mutate-ops-example-com-v1-task"
caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURDekNDQWZPZ0F3SUJBZ0lVWTcwc2M0MHRUWTZqU2hlUC9iQytZN0FLSzFvd0RRWUpLb1pJaHZjTkFRRUwKQlFBd0ZURVRNQkVHQTFVRUF3d0tkMlZpYUc5dmF5MWpZVEFlRncweU5UQXpNamd4TkRJNU1UUmFGdzB5TmpBegpNamd4TkRJNU1UUmFNQlV4RXpBUkJnTlZCQU1NQ25kbFltaHZiMnN0WTJFd2dnRWlNQTBHQ1NxR1NJYjNEUUVCCkFRVUFBNElCRHdBd2dnRUtBb0lCQVFEUzBiQTZWbkwwRjNXYVRNRzJBa1hHOWpKUTMrQ0U4OFlLSFJaZ3h6bTgKM1RsMm1vSlpUeWVDV3BKMlZXWGI0NTB2YzZTL2JUWm1hSGlJTlcvSEFRUm9Ud1NDblJrNy9pNVdJc0h4aW12LworRTB4RURJTktaVDB6VkFXUWl2SGlFNy9RTjdxZEQrb1liYUovcWpXdEc3YUMveEwzT3JsY0toVzl6RTRvRUwrCjZMU0hnQy9xNmJOMFM4ekN6RnRLR3pySGNVSmJpenlrdERMcVVaQkZjTDgrNmpHWVE1SlBZUXQzeStPUUNDWUgKam9ZVnF1N200OGRYdW5jUndWd3pyRkVkODYzaFUycnpYUGgvdCtMSFptN2pETjBYcU0rMkc4cm43QWJTVFdYQwpyQ2djY0xNSWtYYmcwcFg3aVhIekpCTTdsWUlaTGJjMXA0eGhSMDdjSi8wdEFnTUJBQUdqVXpCUk1CMEdBMVVkCkRnUVdCQlNKdm05ZFBZUDVHeDRMTHZtaHNsaE1YQis5YVRBZkJnTlZIU01FR0RBV2dCU0p2bTlkUFlQNUd4NEwKTHZtaHNsaE1YQis5YVRBUEJnTlZIUk1CQWY4RUJUQURBUUgvTUEwR0NTcUdTSWIzRFFFQkN3VUFBNElCQVFBSApMUml2aW1mT013TWd3NzhYK3BPQUxCK1lkaktkbjBVUXpoTGJINVNwMTMrM3ZWR3UvdUpCYkNGbFk2QUZWZ2paCjZpajN1Z3o5TG1KZHpYN2FNZEh5aFdHNXpzMGVMSGc5QnJmL21aQ3llK3lTS2JtUjd1MVkyLzdMbEdJTU1STy8KUjRrVmc0UFozZnlxNDc1d1A2OHRYMjRUMlBFTkZHUUFYblJIeVZWMWZXd0FTc3RFeEtzYWRCSDQ0QURuUy95dgo3ZzhINUN0U09YWk9MKzlNdGNRd3Zmc2FtWlhGSys5ZWxnOURRMC9NT3kycHVrMTlTOFBvSFhDaUk3aVhyTWczCmRvSFMvdkJNdXNIaEl4dEt6VUY2ZjllakNqWjFNWDdjaiswMUxjb2FYRUZGMnR3MllLMVV6LzZCV2NRQitRQXoKbUQxdi9yNjBiM0tvQXEzVVl3c24KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
failurePolicy: Fail
name: mtask-v1.kb.io
rules:
- apiGroups:
- ops.example.com
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- tasks
sideEffects: None
---
apiVersion: admissionregistration.k8s.io/v1
kind: ValidatingWebhookConfiguration
metadata:
name: ops-operator-validating-webhook-configuration
webhooks:
- admissionReviewVersions:
- v1
clientConfig:
#service:
# name: ops-operator-webhook-service
# namespace: ops-operator-system
# path: /validate-ops-example-com-v1-task
url: "https://172.31.6.101:9443/validate-ops-example-com-v1-task"
caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURDekNDQWZPZ0F3SUJBZ0lVWTcwc2M0MHRUWTZqU2hlUC9iQytZN0FLSzFvd0RRWUpLb1pJaHZjTkFRRUwKQlFBd0ZURVRNQkVHQTFVRUF3d0tkMlZpYUc5dmF5MWpZVEFlRncweU5UQXpNamd4TkRJNU1UUmFGdzB5TmpBegpNamd4TkRJNU1UUmFNQlV4RXpBUkJnTlZCQU1NQ25kbFltaHZiMnN0WTJFd2dnRWlNQTBHQ1NxR1NJYjNEUUVCCkFRVUFBNElCRHdBd2dnRUtBb0lCQVFEUzBiQTZWbkwwRjNXYVRNRzJBa1hHOWpKUTMrQ0U4OFlLSFJaZ3h6bTgKM1RsMm1vSlpUeWVDV3BKMlZXWGI0NTB2YzZTL2JUWm1hSGlJTlcvSEFRUm9Ud1NDblJrNy9pNVdJc0h4aW12LworRTB4RURJTktaVDB6VkFXUWl2SGlFNy9RTjdxZEQrb1liYUovcWpXdEc3YUMveEwzT3JsY0toVzl6RTRvRUwrCjZMU0hnQy9xNmJOMFM4ekN6RnRLR3pySGNVSmJpenlrdERMcVVaQkZjTDgrNmpHWVE1SlBZUXQzeStPUUNDWUgKam9ZVnF1N200OGRYdW5jUndWd3pyRkVkODYzaFUycnpYUGgvdCtMSFptN2pETjBYcU0rMkc4cm43QWJTVFdYQwpyQ2djY0xNSWtYYmcwcFg3aVhIekpCTTdsWUlaTGJjMXA0eGhSMDdjSi8wdEFnTUJBQUdqVXpCUk1CMEdBMVVkCkRnUVdCQlNKdm05ZFBZUDVHeDRMTHZtaHNsaE1YQis5YVRBZkJnTlZIU01FR0RBV2dCU0p2bTlkUFlQNUd4NEwKTHZtaHNsaE1YQis5YVRBUEJnTlZIUk1CQWY4RUJUQURBUUgvTUEwR0NTcUdTSWIzRFFFQkN3VUFBNElCQVFBSApMUml2aW1mT013TWd3NzhYK3BPQUxCK1lkaktkbjBVUXpoTGJINVNwMTMrM3ZWR3UvdUpCYkNGbFk2QUZWZ2paCjZpajN1Z3o5TG1KZHpYN2FNZEh5aFdHNXpzMGVMSGc5QnJmL21aQ3llK3lTS2JtUjd1MVkyLzdMbEdJTU1STy8KUjRrVmc0UFozZnlxNDc1d1A2OHRYMjRUMlBFTkZHUUFYblJIeVZWMWZXd0FTc3RFeEtzYWRCSDQ0QURuUy95dgo3ZzhINUN0U09YWk9MKzlNdGNRd3Zmc2FtWlhGSys5ZWxnOURRMC9NT3kycHVrMTlTOFBvSFhDaUk3aVhyTWczCmRvSFMvdkJNdXNIaEl4dEt6VUY2ZjllakNqWjFNWDdjaiswMUxjb2FYRUZGMnR3MllLMVV6LzZCV2NRQitRQXoKbUQxdi9yNjBiM0tvQXEzVVl3c24KLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
failurePolicy: Fail
name: vtask-v1.kb.io
rules:
- apiGroups:
- ops.example.com
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- tasks
sideEffects: None
3.部署资源
3.1部署crd
sh
#crd资源定义yaml位置config/crd/bases/ops.example.com_tasks.yaml
kubectl apply -f ops.example.com_tasks.yaml
3.2运行manager
sh
#将证书tls.crt tls.key放入默认路径/tmp/k8s-webhook-server/serving-certs/
#运行二进制文件manager
#--metrics-bind-address,--metrics-secure=false打开prometheus的metric端口,且不启动ssl认证
#--webhook-cert-path指定webhook证书路径
./manager --metrics-bind-address=:8080 --metrics-secure=false --webhook-cert-path=/tmp/k8s-webhook-server/serving-certs
3.3编辑task实例yaml
yaml
#故意漏掉了retries字段,验证webhook mutating
#可以将schedule字段格式故意写错,验证webhook validating
apiVersion: ops.example.com/v1
kind: Task
metadata:
name: test
namespace: tasks
spec:
ip: "172.31.6.111"
port: 22
username: root
privateKeySecretRef:
name: ssh-secret
key: id_rsa
command: "df -h"
schedule: "*/1 * * * * "
image: "myssh:v1.0"
sh
#部署实例
kubectl apply -f task-schedul.yaml
以上会自动生成cronjob,该cronjob会自动在远程服务器172.31.6.111上,每分钟执行df -h的命令。