该文章旨在快速开发基于真实使用资源调度Pod,偏原理性东西不在此叙述,尽可能通俗易懂
代码文件均已上传github: github.com/huangjc7/sc...
结尾有踩坑解决方案
1.介绍
1.1 版本介绍
基于kubernetes1.28.5开发,各类库使用使用版本为0.28.5
go版本: 1.22
下列是go.mod文件
go
module github.com/huangjc7/scheduler-demo
go 1.22.0
require (
github.com/prometheus/client_golang v1.18.0
github.com/prometheus/common v0.45.0
github.com/spf13/pflag v1.0.5
k8s.io/api v0.28.5
k8s.io/apimachinery v0.28.5
k8s.io/component-base v0.29.2
k8s.io/klog/v2 v2.100.1
k8s.io/kubernetes v1.28.5
)
require (
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
github.com/NYTimes/gziphandler v1.1.1 // indirect
github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df // indirect
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/docker/distribution v2.8.2+incompatible // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/felixge/httpsnoop v1.0.3 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-logr/logr v1.2.4 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/cel-go v0.16.1 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
github.com/moby/sys/mountinfo v0.6.2 // indirect
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/selinux v1.10.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/spf13/cobra v1.7.0 // indirect
github.com/stoewer/go-strcase v1.2.0 // indirect
go.etcd.io/etcd/api/v3 v3.5.9 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.9 // indirect
go.etcd.io/etcd/client/v3 v3.5.9 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.35.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.35.1 // indirect
go.opentelemetry.io/otel v1.10.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.10.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.10.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.10.0 // indirect
go.opentelemetry.io/otel/metric v0.31.0 // indirect
go.opentelemetry.io/otel/sdk v1.10.0 // indirect
go.opentelemetry.io/otel/trace v1.10.0 // indirect
go.opentelemetry.io/proto/otlp v0.19.0 // indirect
go.uber.org/atomic v1.10.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.19.0 // indirect
golang.org/x/crypto v0.19.0 // indirect
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e // indirect
golang.org/x/net v0.21.0 // indirect
golang.org/x/oauth2 v0.17.0 // indirect
golang.org/x/sync v0.3.0 // indirect
golang.org/x/sys v0.17.0 // indirect
golang.org/x/term v0.17.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.3.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20230526161137-0005af68ea54 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20230525234035-dd9d682886f9 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230525234030-28d5490b6b19 // indirect
google.golang.org/grpc v1.56.3 // indirect
google.golang.org/protobuf v1.32.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.0.0 // indirect
k8s.io/apiserver v0.28.5 // indirect
k8s.io/client-go v1.5.2 // indirect
k8s.io/cloud-provider v0.28.5 // indirect
k8s.io/component-helpers v0.28.5 // indirect
k8s.io/controller-manager v0.28.5 // indirect
k8s.io/csi-translation-lib v0.19.9 // indirect
k8s.io/dynamic-resource-allocation v0.0.0 // indirect
k8s.io/kms v0.28.5 // indirect
k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect
k8s.io/kube-scheduler v0.19.9 // indirect
k8s.io/kubelet v0.28.5 // indirect
k8s.io/mount-utils v0.0.0 // indirect
k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.1.2 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)
replace (
k8s.io/api => k8s.io/api v0.28.5
k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.28.5
k8s.io/apimachinery => k8s.io/apimachinery v0.28.5
k8s.io/apiserver => k8s.io/apiserver v0.28.5
k8s.io/cli-runtime => k8s.io/cli-runtime v0.28.5
k8s.io/client-是go => k8s.io/client-go v0.19.9
k8s.io/cloud-provider => k8s.io/cloud-provider v0.28.5
k8s.io/cluster-bootstrap => k8s.io/cluster-bootstrap v0.28.5
k8s.io/code-generator => k8s.io/code-generator v0.28.5
k8s.io/component-base => k8s.io/component-base v0.28.5
k8s.io/cri-api => k8s.io/cri-api v0.28.5
k8s.io/csi-translation-lib => k8s.io/csi-translation-lib v0.28.5
k8s.io/kube-aggregator => k8s.io/kube-aggregator v0.28.5
k8s.io/kube-controller-manager => k8s.io/kube-controller-manager v0.28.5
k8s.io/kube-proxy => k8s.io/kube-proxy v0.28.5
k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.28.5
k8s.io/kubectl => k8s.io/kubectl v0.28.5
k8s.io/kubelet => k8s.io/kubelet v0.28.5
k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.28.5
k8s.io/metrics => k8s.io/metrics v0.28.5
k8s.io/sample-apiserver => k8s.io/sample-apiserver v0.28.5
)
replace k8s.io/client-go => k8s.io/client-go v0.28.5
replace k8s.io/component-helpers => k8s.io/component-helpers v0.28.5
replace k8s.io/controller-manager => k8s.io/controller-manager v0.28.5
replace k8s.io/dynamic-resource-allocation => k8s.io/dynamic-resource-allocation v0.28.5
replace k8s.io/endpointslice => k8s.io/endpointslice v0.28.5
replace k8s.io/kms => k8s.io/kms v0.28.5
replace k8s.io/mount-utils => k8s.io/mount-utils v0.28.5
replace k8s.io/pod-security-admission => k8s.io/pod-security-admission v0.28.5
replace k8s.io/sample-cli-plugin => k8s.io/sample-cli-plugin v0.28.5
replace k8s.io/sample-controller => k8s.io/sample-controller v0.28.5
1.2 目录结构介绍
shell
. tree
├── README.md
├── docker
│ └── Dockerfile
├── go.mod
├── go.sum
├── hack
│ └── go-get-kubernetes.sh
├── main.go
└── pkg
└── plugins
└── sample.go
2. 代码开发
主要核心通过 scheduler frwamwork提供的WithPlugin方法进行插件注册扩展,允许开发者可以编写自己的调度逻辑然后注册在一个插件集合中去
main.go
代码如下
go
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import (
"github.com/huangjc7/scheduler-demo/pkg/plugins"
"math/rand"
"os"
"time"
"github.com/spf13/pflag"
cliflag "k8s.io/component-base/cli/flag"
"k8s.io/component-base/logs"
_ "k8s.io/component-base/metrics/prometheus/clientgo"
_ "k8s.io/component-base/metrics/prometheus/version" // for version metric registration
"k8s.io/kubernetes/cmd/kube-scheduler/app"
)
func main() {
rand.Seed(time.Now().UnixNano())
// 核心:通过WithPlugin方法传入插件名字和插件的实现逻辑 plugins.New中为实现逻辑
// runtime.PluginFactory是一个回调函数
command := app.NewSchedulerCommand(app.WithPlugin(plugins.Name, plugins.New))
pflag.CommandLine.SetNormalizeFunc(cliflag.WordSepNormalizeFunc)
logs.InitLogs()
defer logs.FlushLogs()
if err := command.Execute(); err != nil {
os.Exit(1)
}
}
核心实现逻辑
在scheduling framework中,一个pod的调度是需要通过很多个阶段
例如,预选阶段,优选阶段,节点打分。这里为官方说明
kubernetes.io/docs/concep...
我们本次代码示例是在实现filter阶段进行逻辑开发,通过查询prometheus得到主机cpu空闲率来进行调度
代码文件pkg/plugins/sample.go
go
package plugins
import (
"context"
"fmt"
"github.com/prometheus/client_golang/api"
promeV1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/scheduler/framework"
"time"
)
const (
Name = "sample-plugin" // 定义插件名字
promtheusURL = "http://localhost:8080" //定义prometheus地址
)
// 固定格式 实现FilterPlugin接口
// 因为本次代码要进行filter阶段逻辑开发
// 如果有其他阶段逻辑开发要实现对应的阶段接口
// 例如:
// var _ framework.PreFilterPlugin = &Sample{} // 预过滤阶段接口
// var _ framework.ScorePlugin = &Sample{} // 打分阶段接口
var _ framework.FilterPlugin = &Sample{}
type Sample struct {
handle framework.Handle // 固定格式: 实现Handler接口 内置了一些核心运行方法
prometheusClient promeV1.API //prometheus client
}
// 新建插件
func New(object runtime.Object, f framework.Handle) (framework.Plugin, error) {
// 初始化Prometheus客户端
client, err := api.NewClient(api.Config{
Address: promtheusURL,
})
if err != nil {
return nil, fmt.Errorf("creating prometheus client failed: %v", err)
}
//创建 prometheus API 客户端
promeClient := promeV1.NewAPI(client)
// 将插件调度注册框架
return &Sample{
handle: f,
prometheusClient: promeClient,
}, nil
}
// FilterPlugin涵盖了 Filter()和Name()方法 我们需要在代码中实现
func (s *Sample) Name() string {
return Name
}
func (s *Sample) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
//query prometheus get cpu rate
// 查询 Prometheus 获取节点的 CPU 使用率
cpuUsage, err := s.queryNodeCPUUsage(ctx, nodeInfo.Node().Name)
if err != nil {
return framework.NewStatus(framework.Error, err.Error())
}
// 定义 CPU 使用率的阈值
const maxAllowedCPUUsage = 0.7 // 70%
// 如果节点的 CPU 使用率超过阈值,则不调度 Pod 到该节点
if cpuUsage > maxAllowedCPUUsage {
return framework.NewStatus(framework.Unschedulable, fmt.Sprintf("node's CPU usage is too high: %.2f", cpuUsage))
}
//logic
klog.V(3).Infof("filter pod : %v, node: %v", pod.Name, nodeInfo.Node().Name)
// 成功以后更新filter阶段最终状态为Success 即为0
// 这样符合要求的node将会到达Score打分阶段 如果想在打分阶段在做精细化控制
// 可以额外再去实现SocrePlugin
return framework.NewStatus(framework.Success)
}
func (s *Sample) queryNodeCPUUsage(ctx context.Context, nodeName string) (float64, error) {
// 构建 Prometheus 查询
query := fmt.Sprintf("rate(node_cpu_seconds_total{mode='idle',node='%s'}[1m])", nodeName)
// 打印promQL 便于排错
klog.V(3).Infof("Query promQL", query)
// 执行查询
val, _, err := s.prometheusClient.Query(ctx, query, time.Now())
if err != nil {
return 0, fmt.Errorf("querying Prometheus failed: %v", err)
}
// 解析查询结果
vec, ok := val.(model.Vector)
if !ok || len(vec) == 0 {
return 0, fmt.Errorf("invalid Prometheus response")
}
// 获取 CPU 空闲率
idleCPU := float64(vec[0].Value)
// CPU 使用率为 1 减去 CPU 空闲率
cpuUsage := 1 - idleCPU
return cpuUsage, nil
}
2.1 将scheduler制作成容器
Dockerfile如下
dockerfile
# Build the scheduler binary
FROM golang:1.22 as builder
#RUN apt-get -y update && apt-get -y install upx
WORKDIR /workspace
# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# Copy the go source
COPY main.go main.go
COPY pkg/ pkg/
# Build
ENV CGO_ENABLED=0
ENV GOOS=linux
ENV GOARCH=arm64
ENV GO111MODULE=on
ENV GOPROXY="https://goproxy.cn"
# cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
#RUN go mod download && \
# go build -a -o sample-scheduler main.go && \
# upx sample-scheduler \
RUN go mod download && \
go build -a -o sample-scheduler main.go
FROM ubuntu
COPY --from=builder /workspace/sample-scheduler /usr/local/bin/sample-scheduler
3. 部署
运行插件需要通过一个 --write-config-to
的参数,可以把调度配置的值写入到指定文件中,利用该参数可以来查看下调度器的配置文件,然后再根据配置来添加上我们编写的插件。
修改/etc/kubernetes/manifests/kube-scheduler.yaml
文件,具体修改项如下
yaml
apiVersion: v1
kind: Pod
metadata:
creationTimestamp: null
labels:
component: kube-scheduler
tier: control-plane
name: kube-scheduler
namespace: kube-system
spec:
containers:
- command:
- kube-scheduler
- --authentication-kubeconfig=/etc/kubernetes/scheduler.conf
- --authorization-kubeconfig=/etc/kubernetes/scheduler.conf
- --bind-address=0.0.0.0
- --kubeconfig=/etc/kubernetes/scheduler.conf
# 新增配置
# 将配置文件写入到scheduler-config.yaml文件种
- --write-config-to=/etc/kubernetes/scheduler-config.yaml
- --leader-elect=true
- --port=0
image: registry.aliyuncs.com/k8sxio/kube-scheduler:v1.19.9
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 8
httpGet:
host: 127.0.0.1
path: /healthz
port: 10259
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
host: 127.0.0.1
name: kube-scheduler
resources:
requests:
cpu: 100m
startupProbe:
failureThreshold: 24
httpGet:
host: 127.0.0.1
path: /healthz
port: 10259
scheme: HTTPS
initialDelaySeconds: 10
periodSeconds: 10
timeoutSeconds: 15
volumeMounts:
- mountPath: /etc/kubernetes/scheduler.conf
name: kubeconfig
readOnly: true
#新增配置 挂载
- mountPath: /etc/kubernetes/scheduler-config.yaml
name: scheduler-config
hostNetwork: true
priorityClassName: system-node-critical
volumes:
- hostPath:
path: /etc/kubernetes/scheduler.conf
type: FileOrCreate
name: kubeconfig
# 新增配置 将配置文件挂载到宿主机上去
- hostPath:
path: /etc/kubernetes/scheduler-config.yaml
type: FileOrCreate
name: scheduler-config
修改完了保存以后 kube-scheduler Pod会出现报错,马上恢复刚才修改的部分即可恢复,得到/etc/kubernetes/scheduler-config.yaml
配置文件
yaml
apiVersion: kubescheduler.config.k8s.io/v1
clientConnection:
acceptContentTypes: ""
burst: 100
contentType: application/vnd.kubernetes.protobuf
kubeconfig: /etc/kubernetes/scheduler.conf
qps: 50
enableContentionProfiling: true
enableProfiling: true
kind: KubeSchedulerConfiguration
leaderElection:
leaderElect: true
leaseDuration: 15s
renewDeadline: 10s
resourceLock: leases
resourceName: smaple-scheduler
resourceNamespace: kube-system
retryPeriod: 2s
parallelism: 16
percentageOfNodesToScore: 0
podInitialBackoffSeconds: 1
podMaxBackoffSeconds: 10
profiles:
- pluginConfig:
- args:
favorColor: "#326CE5"
favorNumber: 7
thanksTo: "Kubernetes"
- args:
apiVersion: kubescheduler.config.k8s.io/v1
kind: DefaultPreemptionArgs
minCandidateNodesAbsolute: 100
minCandidateNodesPercentage: 10
name: DefaultPreemption
- args:
apiVersion: kubescheduler.config.k8s.io/v1
hardPodAffinityWeight: 1
ignorePreferredTermsOfExistingPods: false
kind: InterPodAffinityArgs
name: InterPodAffinity
- args:
apiVersion: kubescheduler.config.k8s.io/v1
kind: NodeAffinityArgs
name: NodeAffinity
- args:
apiVersion: kubescheduler.config.k8s.io/v1
kind: NodeResourcesBalancedAllocationArgs
resources:
- name: cpu
weight: 1
- name: memory
weight: 1
name: NodeResourcesBalancedAllocation
- args:
apiVersion: kubescheduler.config.k8s.io/v1
kind: NodeResourcesFitArgs
scoringStrategy:
resources:
- name: cpu
weight: 1
- name: memory
weight: 1
type: LeastAllocated
name: NodeResourcesFit
- args:
apiVersion: kubescheduler.config.k8s.io/v1
defaultingType: System
kind: PodTopologySpreadArgs
name: PodTopologySpread
- args:
apiVersion: kubescheduler.config.k8s.io/v1
bindTimeoutSeconds: 600
kind: VolumeBindingArgs
name: VolumeBinding
plugins:
bind: {}
filter:
enabled:
- name: "sample-plugin"
multiPoint:
enabled:
- name: PrioritySort
weight: 0
- name: NodeUnschedulable
weight: 0
- name: NodeName
weight: 0
- name: TaintToleration
weight: 3
- name: NodeAffinity
weight: 2
- name: NodePorts
weight: 0
- name: NodeResourcesFit
weight: 1
- name: VolumeRestrictions
weight: 0
- name: EBSLimits
weight: 0
- name: GCEPDLimits
weight: 0
- name: NodeVolumeLimits
weight: 0
- name: AzureDiskLimits
weight: 0
- name: VolumeBinding
weight: 0
- name: VolumeZone
weight: 0
- name: PodTopologySpread
weight: 2
- name: InterPodAffinity
weight: 2
- name: DefaultPreemption
weight: 0
- name: NodeResourcesBalancedAllocation
weight: 1
- name: ImageLocality
weight: 1
- name: DefaultBinder
weight: 0
- name: SchedulingGates
weight: 0
permit: {}
postBind: {}
postFilter: {}
preBind: {}
preEnqueue: {}
preFilter:
enabled:
- name: "sample-plugin"
preScore: {}
queueSort: {}
reserve: {}
score: {}
schedulerName: sample-scheduler
3.1 运行自定义scheduler
将自定义scheduler以deployment方式运行起来并且将上述scheduler-config.yaml
配置文件挂载至自定义scheduler容器中,具体yaml如下
yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: sample-scheduler-sa
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: sample-scheduler-admin-binding
subjects:
- kind: ServiceAccount
name: sample-scheduler-sa
namespace: kube-system
roleRef:
kind: ClusterRole
name: cluster-admin
apiGroup: rbac.authorization.k8s.io
---
apiVersion: v1
kind: ConfigMap
metadata:
name: scheduler-config
namespace: kube-system
data:
## 挂载scheduler-config.yaml配置文件
## 注意: 这里不是完整是scheduler-config.yaml配置文件 只挂载了部分需要的
scheduler-config.yaml: |
apiVersion: kubescheduler.config.k8s.io/v1
kind: KubeSchedulerConfiguration
leaderElection:
leaderElect: true
leaseDuration: 15s
renewDeadline: 10s
resourceLock: leases
resourceName: smaple-scheduler # 此处修改自定义scheduler name
resourceNamespace: kube-system
retryPeriod: 2s
parallelism: 16
percentageOfNodesToScore: 0
podInitialBackoffSeconds: 1
podMaxBackoffSeconds: 10
profiles:
- pluginConfig:
- args:
apiVersion: kubescheduler.config.k8s.io/v1
kind: DefaultPreemptionArgs
minCandidateNodesAbsolute: 100
minCandidateNodesPercentage: 10
name: DefaultPreemption
- args:
apiVersion: kubescheduler.config.k8s.io/v1
hardPodAffinityWeight: 1
ignorePreferredTermsOfExistingPods: false
kind: InterPodAffinityArgs
name: InterPodAffinity
- args:
apiVersion: kubescheduler.config.k8s.io/v1
kind: NodeAffinityArgs
name: NodeAffinity
- args:
apiVersion: kubescheduler.config.k8s.io/v1
kind: NodeResourcesBalancedAllocationArgs
resources:
- name: cpu
weight: 1
- name: memory
weight: 1
name: NodeResourcesBalancedAllocation
- args:
apiVersion: kubescheduler.config.k8s.io/v1
kind: NodeResourcesFitArgs
scoringStrategy:
resources:
- name: cpu
weight: 1
- name: memory
weight: 1
type: LeastAllocated
name: NodeResourcesFit
- args:
apiVersion: kubescheduler.config.k8s.io/v1
defaultingType: System
kind: PodTopologySpreadArgs
name: PodTopologySpread
- args:
apiVersion: kubescheduler.config.k8s.io/v1
bindTimeoutSeconds: 600
kind: VolumeBindingArgs
name: VolumeBinding
plugins:
bind: {}
filter:
enabled:
- name: "sample-plugin" # 在filter阶段使用sample-plugin插件 而不使用默认插件
multiPoint:
enabled:
- name: PrioritySort
weight: 0
- name: NodeUnschedulable
weight: 0
- name: NodeName
weight: 0
- name: TaintToleration
weight: 3
- name: NodeAffinity
weight: 2
- name: NodePorts
weight: 0
- name: NodeResourcesFit
weight: 1
- name: VolumeRestrictions
weight: 0
- name: EBSLimits
weight: 0
- name: GCEPDLimits
weight: 0
- name: NodeVolumeLimits
weight: 0
- name: AzureDiskLimits
weight: 0
- name: VolumeBinding
weight: 0
- name: VolumeZone
weight: 0
- name: PodTopologySpread
weight: 2
- name: InterPodAffinity
weight: 2
- name: DefaultPreemption
weight: 0
- name: NodeResourcesBalancedAllocation
weight: 1
- name: ImageLocality
weight: 1
- name: DefaultBinder
weight: 0
- name: SchedulingGates
weight: 0
permit: {}
postBind: {}
postFilter: {}
preBind: {}
preEnqueue: {}
preFilter:
enabled:
- name: "sample-plugin"
preScore: {}
queueSort: {}
reserve: {}
score: {}
schedulerName: sample-scheduler # 定义使用调度器名字
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: sample-scheduler
namespace: kube-system
labels:
component: sample-scheduler
spec:
selector:
matchLabels:
component: sample-scheduler
template:
metadata:
labels:
component: sample-scheduler
spec:
serviceAccountName: sample-scheduler-sa
priorityClassName: system-cluster-critical
volumes:
- name: scheduler-config
configMap:
name: scheduler-config
containers:
- name: scheduler
image: kubernetes/samp-scheduler:v1.0.1 # 调度器镜像
imagePullPolicy: IfNotPresent
command:
- sample-scheduler
- --config=/etc/kubernetes/scheduler-config.yaml # 制定配置文件
- --v=3
volumeMounts:
- name: scheduler-config
mountPath: /etc/kubernetes
# livenessProbe:
# httpGet:
# path: /healthz
# port: 10251
# initialDelaySeconds: 15
# readinessProbe:
# httpGet:
# path: /healthz
# port: 10251
4. 测试
集群目前有两个节点 node02和node03(node01为master)
将node03 cpu资源跑满 使用下列脚本即可
shell
#!/bin/bash
# 此脚本会创建足够多的后台任务来使CPU使用率达到100%
# 获取CPU核心数
CORES=$(nproc)
# 对于每个核心,启动一个后台任务
for i in $(seq 1 $CORES); do
# 使用一个无限循环来进行计算密集型的操作
while : ; do : ; done &
done
# 等待所有后台任务结束(实际上这些任务永远不会结束)
wait
部署一个nginx指定自定义scheduler进行调度测试
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-scheduler
spec:
selector:
matchLabels:
app: test-scheduler # 指定自定义schduler
template:
metadata:
labels:
app: test-scheduler
spec:
schedulerName: sample-scheduler
containers:
- image: nginx
imagePullPolicy: IfNotPresent
name: nginx
ports:
- containerPort: 80
自定义scheduler日志如下:
shell
I0303 14:07:59.523243 1 eventhandlers.go:197] "Add event for scheduled pod" pod="default/test-scheduler-668f68df99-82tjw"
I0303 14:07:59.523235 1 eventhandlers.go:171] "Delete event for unscheduled pod" pod="default/test-scheduler-668f68df99-82tjw"
I0303 14:07:59.718833 1 eventhandlers.go:244] "Delete event for scheduled pod" pod="default/test-scheduler-668f68df99-7psgg"
I0303 14:08:03.121363 1 eventhandlers.go:126] "Add event for unscheduled pod" pod="default/test-scheduler-668f68df99-btsmt"
I0303 14:08:03.121403 1 schedule_one.go:93] "Attempting to schedule pod" pod="default/test-scheduler-668f68df99-btsmt"
I0303 14:08:03.121476 1 sample.go:117] prefilter pod: test-scheduler-668f68df99-btsmt
I0303 14:08:03.121518 1 log.go:245] Query promQL rate(node_cpu_seconds_total{mode='idle',node='node02'}[1m])
I0303 14:08:03.121685 1 log.go:245] Query promQL rate(node_cpu_seconds_total{mode='idle',node='node03'}[1m])
I0303 14:08:03.124109 1 sample.go:150] filter pod : test-scheduler-668f68df99-btsmt, node: node02, pre state: &{{0 0 0 0 map[]}}
I0303 14:08:03.124193 1 default_binder.go:53] "Attempting to bind pod to node" pod="default/test-scheduler-668f68df99-btsmt" node="node02"
I0303 14:08:03.127611 1 eventhandlers.go:197] "Add event for scheduled pod" pod="default/test-scheduler-668f68df99-btsmt"
I0303 14:08:03.127612 1 eventhandlers.go:171] "Delete event for unscheduled pod" pod="default/test-scheduler-668f68df99-btsmt"
I0303 14:08:03.129072 1 schedule_one.go:286] "Successfully bound pod to node" pod="default/test-scheduler-668f68df99-btsmt" node="node02" evaluatedNodes=3 feasibleNodes=1
5. 踩坑
kubernetes因为早期包管理问题,导致直接go get k8s.io/kubernetes@v1.28.5
会存在文件,这里官方提供了一个脚本,直接获取对应kubernetes版本的go.mod
脚本地址: github.com/huangjc7/sc...
使用方法: bash hack/go-get-kubernetes.sh v1.28.5