kubectl top node报错处理
解决步骤
因项目要求,需在k8s集群中使用 kubectl top node命令,但是一直报error: metrics not available yet错误。为了更好的复现问题,我们将本次解决问题的步骤整理如下。
环境说明
k8s版本:v1.23.5
k8s部署方式:kubeadm
powershell
##版本说明,当前的k8s版本为v1.23.5
[root@XXXX ~]# kubectl get node
NAME STATUS ROLES AGE VERSION
test-worker1 Ready <none> 160d v1.23.5
问题现象
powershell
#1 查看node出错
[root@XXXX ~]# kubectl top node
error: metrics not available yet
#2 查看pod没问题
[root@XXXXX ~]# kubectl top pod
NAME CPU(cores) MEMORY(bytes)
XXX-48jgq 16m 75Mi
XXX-55g2z 12m 82Mi
XXX-6tj6c 8m 143Mi
初次排查
powershell
# 查看metrics.k8s.io的api情况
kubectl get --raw /apis/metrics.k8s.io/v1beta1 | python -m json.tool
{
"apiVersion": "v1",
"groupVersion": "metrics.k8s.io/v1beta1",
"kind": "APIResourceList",
"resources": [
{
"kind": "NodeMetrics",
"name": "nodes",
"namespaced": false,
"singularName": "",
"verbs": [
"get",
"list"
]
},
{
"kind": "PodMetrics",
"name": "pods",
"namespaced": true,
"singularName": "",
"verbs": [
"get",
"list"
]
}
]
}
#2 查看apiservices 服务
kubectl describe apiservices v1beta1.metrics.k8s.io
Status:
Conditions:
Last Transition Time: 2023-11-07T16:16:24Z
Message: all checks passed
Reason: Passed
Status: True
Type: Available
问题解决
部署metric-server,使用yaml方式进行部署,因为git下载不下来,我提前下载下来,然后换了阿里的镜像源,要注意metric-server和k8s版本的兼容性:
版本兼容性
根据图表,我们的k8s版本是1.23,因此我们使用metrics-server:v0.6.4
metric-server.yaml
yaml
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
rbac.authorization.k8s.io/aggregate-to-admin: "true"
rbac.authorization.k8s.io/aggregate-to-edit: "true"
rbac.authorization.k8s.io/aggregate-to-view: "true"
name: system:aggregated-metrics-reader
rules:
- apiGroups:
- metrics.k8s.io
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
rules:
- apiGroups:
- ""
resources:
- nodes/metrics
verbs:
- get
- apiGroups:
- ""
resources:
- pods
- nodes
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server-auth-reader
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: metrics-server:system:auth-delegator
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:auth-delegator
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
k8s-app: metrics-server
name: system:metrics-server
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: system:metrics-server
subjects:
- kind: ServiceAccount
name: metrics-server
namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
ports:
- name: https
port: 443
protocol: TCP
targetPort: https
selector:
k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
k8s-app: metrics-server
name: metrics-server
namespace: kube-system
spec:
selector:
matchLabels:
k8s-app: metrics-server
strategy:
rollingUpdate:
maxUnavailable: 0
template:
metadata:
labels:
k8s-app: metrics-server
spec:
containers:
- args:
- --cert-dir=/tmp
- --secure-port=4443
- --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
- --kubelet-use-node-status-port
- --metric-resolution=15s
- --kubelet-insecure-tls
image: registry.cn-hangzhou.aliyuncs.com/rainux/metrics-server:v0.6.4
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 3
httpGet:
path: /livez
port: https
scheme: HTTPS
periodSeconds: 10
name: metrics-server
ports:
- containerPort: 4443
name: https
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
path: /readyz
port: https
scheme: HTTPS
initialDelaySeconds: 20
periodSeconds: 10
resources:
requests:
cpu: 100m
memory: 200Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1000
volumeMounts:
- mountPath: /tmp
name: tmp-dir
nodeSelector:
kubernetes.io/os: linux
priorityClassName: system-cluster-critical
serviceAccountName: metrics-server
volumes:
- emptyDir: {}
name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
labels:
k8s-app: metrics-server
name: v1beta1.metrics.k8s.io
spec:
group: metrics.k8s.io
groupPriorityMinimum: 100
insecureSkipTLSVerify: true
service:
name: metrics-server
namespace: kube-system
version: v1beta1
versionPriority: 100
问题验证
powershell
#1 运行yaml文件
[root@k8s-master][~]
$kubectl create -f metrics-server.yaml
#2 问题验证
[root@XXXX][~]
$kubectl top node
NAME CPU(cores) CPU% MEMORY(bytes) MEMORY%
k8s-master 194m 9% 1689Mi 35%
[root@XXXX][~]
$kubectl top pod
NAME CPU(cores) MEMORY(bytes)
cm-deploy-XXXX 0m 3Mi
nfs-XXXXX 0m 13Mi