基于Helm部署kube-prometheus-stack

概述

GitHub地址:https://github.com/prometheus-operator/prometheus-operator

Helm仓库:https://prometheus-community.github.io/helm-charts

安装prometheus-operator前置条件:

确保你有一个k8s集群

安装k8s集群可参考:使用sealos快速搭建kubernetes集群!!!

确保你安装了Helm

安装Helm可参考:安装Helm

实操

创建SC

我这里以NFS作为存储

配置NFS参考这篇文章:https://www.cnblogs.com/huangSir-devops/p/18802386

创建SC参考:https://www.cnblogs.com/huangSir-devops/p/18871414#_label4_2

创建grafana的存储

复制代码
# 配置NFS
[root@master01 ~/volumes]# mkdir -p /data/nfs/grafana
[root@master01 ~/volumes]# echo '/data/nfs/grafana 10.0.0.0/24(rw,sync,no_root_squash,no_subtree_check)' >> /etc/exports
[root@master ~/volumes]# exportfs -ra

# 定义资源清单
[root@master01 ~/volumes]# cat grafana-sc.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: grafana-sc  # StorageClass名称,PVC通过该名称引用此存储类
provisioner: nfs.csi.k8s.io  # 指定使用NFS CSI驱动作为存储供给器
parameters:  # 传递给NFS CSI驱动的参数
  server: 10.0.0.30  # NFS服务器的IP地址
  share: /data/nfs/grafana  # NFS服务器上的共享目录路径
  # csi.storage.k8s.io/provisioner-secret is only needed for providing mountOptions in DeleteVolume
  # csi.storage.k8s.io/provisioner-secret-name: "mount-options"
  # csi.storage.k8s.io/provisioner-secret-namespace: "default"
reclaimPolicy: Retain  # 回收策略:当PVC被删除时,PV保留不删除
volumeBindingMode: Immediate  # 卷绑定模式:立即绑定,不需要等待Pod调度
allowVolumeExpansion: true  # 允许卷扩容:支持通过修改PVC请求更大容量

[root@master ~]# kubectl apply -f grafana-sc.yaml

[root@master ~]# kubectl get sc
NAME         PROVISIONER      RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
grafana-sc   nfs.csi.k8s.io   Retain          Immediate           true                   3m44s

创建Prometheus的存储

复制代码
# 配置NFS
[root@master01 ~/volumes]# mkdir -p /data/nfs/prometheus
[root@master01 ~/volumes]# echo '/data/nfs/prometheus 10.0.0.0/24(rw,sync,no_root_squash,no_subtree_check)' >> /etc/exports
[root@master ~/volumes]# exportfs -ra

[root@master01 ~/volumes]# cat prometheus-sc.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
  name: prometheus-sc  # StorageClass名称,PVC通过该名称引用此存储类
provisioner: nfs.csi.k8s.io  # 指定使用NFS CSI驱动作为存储供给器
parameters:  # 传递给NFS CSI驱动的参数
  server: 10.0.0.30  # NFS服务器的IP地址
  share: /data/nfs/prometheus  # NFS服务器上的共享目录路径
  # csi.storage.k8s.io/provisioner-secret is only needed for providing mountOptions in DeleteVolume
  # csi.storage.k8s.io/provisioner-secret-name: "mount-options"
  # csi.storage.k8s.io/provisioner-secret-namespace: "default"
reclaimPolicy: Retain  # 回收策略:当PVC被删除时,PV保留不删除
volumeBindingMode: Immediate  # 卷绑定模式:立即绑定,不需要等待Pod调度
allowVolumeExpansion: true  # 允许卷扩容:支持通过修改PVC请求更大容量

[root@master ~/volumes]# kubectl apply -f prometheus-sc.yaml
storageclass.storage.k8s.io/prometheus-sc created
[root@master ~/volumes]# kubectl get sc
NAME            PROVISIONER      RECLAIMPOLICY   VOLUMEBINDINGMODE   ALLOWVOLUMEEXPANSION   AGE
grafana-sc      nfs.csi.k8s.io   Retain          Immediate           true                   3h40m
nfs-csi         nfs.csi.k8s.io   Delete          Immediate           true                   175m
prometheus-sc   nfs.csi.k8s.io   Retain          Immediate           true                   4s

配置operator

添加仓库

复制代码
# 添加社区版仓库
[root@master ~]# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
[root@master ~]# helm repo update
[root@master ~]# helm pull helm pull prometheus/kube-prometheus-stack --version 73.2.0


# 如果上面因为网络报错,可使用下面的
[root@master ~]# wget https://github.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-73.2.0/kube-prometheus-stack-73.2.0.tgz

解压

复制代码
[root@master ~]# tar -xvf kube-prometheus-stack-73.2.0.tgz
[root@master ~]# ll kube-prometheus-stack
total 228
drwxr-xr-x  4 root root   4096 Jun  7 15:27 ./
drwx------ 31 root root   4096 Jun  7 15:27 ../
-rw-r--r--  1 root root    458 Jun  6 01:22 .helmignore
-rw-r--r--  1 root root    615 Jun  6 01:22 Chart.lock
-rw-r--r--  1 root root   2571 Jun  6 01:22 Chart.yaml
-rw-r--r--  1 root root  19869 Jun  6 01:22 README.md
drwxr-xr-x  7 root root   4096 Jun  7 15:27 charts/
drwxr-xr-x  8 root root   4096 Jun  7 15:27 templates/
-rw-r--r--  1 root root 182959 Jun  6 01:22 values.yaml

修改镜像源,防止镜像拉取失败

复制代码
[root@master ~/kube-prometheus-stack]# sed -i s#registry.k8s.io#k8s.m.daocloud.io#g values.yaml
[root@master ~/kube-prometheus-stack]# sed -i s#quay.io#quay.m.daocloud.io#g values.yaml
[root@master ~/kube-prometheus-stack]# sed -i s#docker.io#docker.m.daocloud.io#g values.yaml
[root@master ~/kube-prometheus-stack]# sed -i s#registry.k8s.io#k8s.m.daocloud.io#g charts/kube-state-metrics/values.yaml

配置Grafana

复制代码
[root@master ~]# vim ~/kube-prometheus-stack/charts/grafana/values.yaml
# 修改service,改成NodePort访问
 231 service:
 232   enabled: true
 233   type: NodePort
 234   nodePort: 30030

# 配置存储
 417 persistence:
 418   type: pvc
 419   enabled: true
       # sc的名称,和上面保持一致
 420   storageClassName: grafana-sc
 421   ## (Optional) Use this to bind the claim to an existing PersistentVolume (PV) by name.
 422   volumeName: ""

# 修改用户名密码 (可选)
[root@master ~]# vim ~/kube-prometheus-stack/values.yaml
1287   adminUser: admin
1288   adminPassword: prom-operator

配置Prometheus

复制代码
[root@master ~]# vim ~/kube-prometheus-stack/values.yaml
3371 prometheus:
3372   enabled: true
# 修改service类型NodePort
3564    type: NodePort
# 端口可选配置
3551     nodePort: 30090
# 存储配置
4299     storageSpec:
4300     ## Using PersistentVolumeClaim
4301     ##
4302       volumeClaimTemplate:
4303         spec:
4304           storageClassName: prometheus-sc
4305           accessModes: ["ReadWriteOnce"]
4306           resources:
4307             requests:
4308               storage: 10Gi

创建Realsese

复制代码
[root@master ~]# helm install prometheus kube-prometheus-stack --debug
# 出现以下内容即代表成功
NOTES:
kube-prometheus-stack has been installed. Check its status by running:
  kubectl --namespace default get pods -l "release=prometheus"

Get Grafana 'admin' user password by running:

  kubectl --namespace default get secrets prometheus-grafana -o jsonpath="{.data.admin-password}" | base64 -d ; echo

Access Grafana local instance:

  export POD_NAME=$(kubectl --namespace default get pod -l "app.kubernetes.io/name=grafana,app.kubernetes.io/instance=prometheus" -oname)
  kubectl --namespace default port-forward $POD_NAME 3000

Visit https://github.com/prometheus-operator/kube-prometheus for instructions on how to create & configure Alertmanager and Prometheus instances using the Operator.

查看创建的资源

复制代码
# 查看Pod
[root@master ~]# kubectl get po
NAME                                                     READY   STATUS    RESTARTS   AGE
alertmanager-prometheus-kube-prometheus-alertmanager-0   2/2     Running   0          70s
prometheus-grafana-55cbbf54b7-lmhnd                      3/3     Running   0          70s
prometheus-kube-prometheus-operator-847fd659bc-scp4w     1/1     Running   0          70s
prometheus-kube-state-metrics-5fb66759db-nb242           1/1     Running   0          70s
prometheus-prometheus-kube-prometheus-prometheus-0       2/2     Running   0          70s
prometheus-prometheus-node-exporter-89xt7                1/1     Running   0          70s
prometheus-prometheus-node-exporter-cn8s4                1/1     Running   0          70s
prometheus-prometheus-node-exporter-llqgx                1/1     Running   0          70s

# 查看svc
[root@master ~]# kubectl get svc
NAME                                      TYPE        CLUSTER-IP    EXTERNAL-IP   PORT(S)                         AGE
alertmanager-operated                     ClusterIP   None          <none>        9093/TCP,9094/TCP,9094/UDP      5m
kubernetes                                ClusterIP   10.96.0.1     <none>        443/TCP                         5m
prometheus-grafana                        NodePort    10.96.1.173   <none>        80:30030/TCP                    5m
prometheus-kube-prometheus-alertmanager   ClusterIP   10.96.3.151   <none>        9093/TCP,8080/TCP               5m
prometheus-kube-prometheus-operator       ClusterIP   10.96.2.62    <none>        443/TCP                         5m
prometheus-kube-prometheus-prometheus     NodePort    10.96.0.114   <none>        9090:30090/TCP,8080:31659/TCP   5m
prometheus-kube-state-metrics             ClusterIP   10.96.1.88    <none>        8080/TCP                        5m
prometheus-operated                       ClusterIP   None          <none>        9090/TCP                        5m
prometheus-prometheus-node-exporter       ClusterIP   10.96.0.191   <none>        9100/TCP                        5m

访问测试

访问Grafana

http://10.0.0.30:30030/

  • 用户名:admin
  • 密码:prom-operator

用户名和密码是上面配置的

访问Prometheus

http://10.0.0.30:30090/targets

kube-prometheus-stack监控应用程序

kube-prometheus-stack通过PodMonitorServiceMonitor对象来监视示例应用程序的目标

参考这篇文章:https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/developer/getting-started.md

架构图

通过ServiceMonitor监控应用程序

示例:

复制代码
省略,这块有时间补齐,可以先看官方文档

通过PodMonitor监控应用程序

示例:

复制代码
省略,这块有时间补齐,可以先看官方文档