概述
GitHub地址:https://github.com/prometheus-operator/prometheus-operator
Helm仓库:https://prometheus-community.github.io/helm-charts
安装prometheus-operator前置条件:
确保你有一个k8s集群
安装k8s集群可参考:使用sealos快速搭建kubernetes集群!!!
确保你安装了Helm
安装Helm可参考:安装Helm
实操
创建SC
我这里以NFS作为存储
配置NFS参考这篇文章:https://www.cnblogs.com/huangSir-devops/p/18802386
创建SC参考:https://www.cnblogs.com/huangSir-devops/p/18871414#_label4_2
创建grafana的存储
# 配置NFS
[root@master01 ~/volumes]# mkdir -p /data/nfs/grafana
[root@master01 ~/volumes]# echo '/data/nfs/grafana 10.0.0.0/24(rw,sync,no_root_squash,no_subtree_check)' >> /etc/exports
[root@master ~/volumes]# exportfs -ra
# 定义资源清单
[root@master01 ~/volumes]# cat grafana-sc.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: grafana-sc # StorageClass名称,PVC通过该名称引用此存储类
provisioner: nfs.csi.k8s.io # 指定使用NFS CSI驱动作为存储供给器
parameters: # 传递给NFS CSI驱动的参数
server: 10.0.0.30 # NFS服务器的IP地址
share: /data/nfs/grafana # NFS服务器上的共享目录路径
# csi.storage.k8s.io/provisioner-secret is only needed for providing mountOptions in DeleteVolume
# csi.storage.k8s.io/provisioner-secret-name: "mount-options"
# csi.storage.k8s.io/provisioner-secret-namespace: "default"
reclaimPolicy: Retain # 回收策略:当PVC被删除时,PV保留不删除
volumeBindingMode: Immediate # 卷绑定模式:立即绑定,不需要等待Pod调度
allowVolumeExpansion: true # 允许卷扩容:支持通过修改PVC请求更大容量
[root@master ~]# kubectl apply -f grafana-sc.yaml
[root@master ~]# kubectl get sc
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
grafana-sc nfs.csi.k8s.io Retain Immediate true 3m44s
创建Prometheus的存储
# 配置NFS
[root@master01 ~/volumes]# mkdir -p /data/nfs/prometheus
[root@master01 ~/volumes]# echo '/data/nfs/prometheus 10.0.0.0/24(rw,sync,no_root_squash,no_subtree_check)' >> /etc/exports
[root@master ~/volumes]# exportfs -ra
[root@master01 ~/volumes]# cat prometheus-sc.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: prometheus-sc # StorageClass名称,PVC通过该名称引用此存储类
provisioner: nfs.csi.k8s.io # 指定使用NFS CSI驱动作为存储供给器
parameters: # 传递给NFS CSI驱动的参数
server: 10.0.0.30 # NFS服务器的IP地址
share: /data/nfs/prometheus # NFS服务器上的共享目录路径
# csi.storage.k8s.io/provisioner-secret is only needed for providing mountOptions in DeleteVolume
# csi.storage.k8s.io/provisioner-secret-name: "mount-options"
# csi.storage.k8s.io/provisioner-secret-namespace: "default"
reclaimPolicy: Retain # 回收策略:当PVC被删除时,PV保留不删除
volumeBindingMode: Immediate # 卷绑定模式:立即绑定,不需要等待Pod调度
allowVolumeExpansion: true # 允许卷扩容:支持通过修改PVC请求更大容量
[root@master ~/volumes]# kubectl apply -f prometheus-sc.yaml
storageclass.storage.k8s.io/prometheus-sc created
[root@master ~/volumes]# kubectl get sc
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
grafana-sc nfs.csi.k8s.io Retain Immediate true 3h40m
nfs-csi nfs.csi.k8s.io Delete Immediate true 175m
prometheus-sc nfs.csi.k8s.io Retain Immediate true 4s
配置operator
添加仓库
# 添加社区版仓库
[root@master ~]# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
[root@master ~]# helm repo update
[root@master ~]# helm pull helm pull prometheus/kube-prometheus-stack --version 73.2.0
# 如果上面因为网络报错,可使用下面的
[root@master ~]# wget https://github.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-73.2.0/kube-prometheus-stack-73.2.0.tgz
解压
[root@master ~]# tar -xvf kube-prometheus-stack-73.2.0.tgz
[root@master ~]# ll kube-prometheus-stack
total 228
drwxr-xr-x 4 root root 4096 Jun 7 15:27 ./
drwx------ 31 root root 4096 Jun 7 15:27 ../
-rw-r--r-- 1 root root 458 Jun 6 01:22 .helmignore
-rw-r--r-- 1 root root 615 Jun 6 01:22 Chart.lock
-rw-r--r-- 1 root root 2571 Jun 6 01:22 Chart.yaml
-rw-r--r-- 1 root root 19869 Jun 6 01:22 README.md
drwxr-xr-x 7 root root 4096 Jun 7 15:27 charts/
drwxr-xr-x 8 root root 4096 Jun 7 15:27 templates/
-rw-r--r-- 1 root root 182959 Jun 6 01:22 values.yaml
修改镜像源,防止镜像拉取失败
[root@master ~/kube-prometheus-stack]# sed -i s#registry.k8s.io#k8s.m.daocloud.io#g values.yaml
[root@master ~/kube-prometheus-stack]# sed -i s#quay.io#quay.m.daocloud.io#g values.yaml
[root@master ~/kube-prometheus-stack]# sed -i s#docker.io#docker.m.daocloud.io#g values.yaml
[root@master ~/kube-prometheus-stack]# sed -i s#registry.k8s.io#k8s.m.daocloud.io#g charts/kube-state-metrics/values.yaml
配置Grafana
[root@master ~]# vim ~/kube-prometheus-stack/charts/grafana/values.yaml
# 修改service,改成NodePort访问
231 service:
232 enabled: true
233 type: NodePort
234 nodePort: 30030
# 配置存储
417 persistence:
418 type: pvc
419 enabled: true
# sc的名称,和上面保持一致
420 storageClassName: grafana-sc
421 ## (Optional) Use this to bind the claim to an existing PersistentVolume (PV) by name.
422 volumeName: ""
# 修改用户名密码 (可选)
[root@master ~]# vim ~/kube-prometheus-stack/values.yaml
1287 adminUser: admin
1288 adminPassword: prom-operator
配置Prometheus
[root@master ~]# vim ~/kube-prometheus-stack/values.yaml
3371 prometheus:
3372 enabled: true
# 修改service类型NodePort
3564 type: NodePort
# 端口可选配置
3551 nodePort: 30090
# 存储配置
4299 storageSpec:
4300 ## Using PersistentVolumeClaim
4301 ##
4302 volumeClaimTemplate:
4303 spec:
4304 storageClassName: prometheus-sc
4305 accessModes: ["ReadWriteOnce"]
4306 resources:
4307 requests:
4308 storage: 10Gi
创建Realsese
[root@master ~]# helm install prometheus kube-prometheus-stack --debug
# 出现以下内容即代表成功
NOTES:
kube-prometheus-stack has been installed. Check its status by running:
kubectl --namespace default get pods -l "release=prometheus"
Get Grafana 'admin' user password by running:
kubectl --namespace default get secrets prometheus-grafana -o jsonpath="{.data.admin-password}" | base64 -d ; echo
Access Grafana local instance:
export POD_NAME=$(kubectl --namespace default get pod -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=prometheus" -oname)
kubectl --namespace default port-forward $POD_NAME 3000
Visit https://github.com/prometheus-operator/kube-prometheus for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator.
查看创建的资源
# 查看Pod
[root@master ~]# kubectl get po
NAME READY STATUS RESTARTS AGE
alertmanager-prometheus-kube-prometheus-alertmanager-0 2/2 Running 0 70s
prometheus-grafana-55cbbf54b7-lmhnd 3/3 Running 0 70s
prometheus-kube-prometheus-operator-847fd659bc-scp4w 1/1 Running 0 70s
prometheus-kube-state-metrics-5fb66759db-nb242 1/1 Running 0 70s
prometheus-prometheus-kube-prometheus-prometheus-0 2/2 Running 0 70s
prometheus-prometheus-node-exporter-89xt7 1/1 Running 0 70s
prometheus-prometheus-node-exporter-cn8s4 1/1 Running 0 70s
prometheus-prometheus-node-exporter-llqgx 1/1 Running 0 70s
# 查看svc
[root@master ~]# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager-operated ClusterIP None <none> 9093/TCP,9094/TCP,9094/UDP 5m
kubernetes ClusterIP 10.96.0.1 <none> 443/TCP 5m
prometheus-grafana NodePort 10.96.1.173 <none> 80:30030/TCP 5m
prometheus-kube-prometheus-alertmanager ClusterIP 10.96.3.151 <none> 9093/TCP,8080/TCP 5m
prometheus-kube-prometheus-operator ClusterIP 10.96.2.62 <none> 443/TCP 5m
prometheus-kube-prometheus-prometheus NodePort 10.96.0.114 <none> 9090:30090/TCP,8080:31659/TCP 5m
prometheus-kube-state-metrics ClusterIP 10.96.1.88 <none> 8080/TCP 5m
prometheus-operated ClusterIP None <none> 9090/TCP 5m
prometheus-prometheus-node-exporter ClusterIP 10.96.0.191 <none> 9100/TCP 5m
访问测试
访问Grafana
- 用户名:admin
- 密码:prom-operator
用户名和密码是上面配置的


访问Prometheus
http://10.0.0.30:30090/targets
kube-prometheus-stack监控应用程序
kube-prometheus-stack通过PodMonitor
和ServiceMonitor
对象来监视示例应用程序的目标
架构图
通过ServiceMonitor监控应用程序
示例:
省略,这块有时间补齐,可以先看官方文档
通过PodMonitor监控应用程序
示例:
省略,这块有时间补齐,可以先看官方文档