prometheus添加es监控模块

yunson_Liu2026-04-04 8:59

1、helm部署es-exporter

shell 复制代码

helm upgrade --install es-exporter prometheus-community/prometheus-elasticsearch-exporter  --set es.uri=https://elastic:xxx@172.31.107.43:9200 --set es.sslSkipVerify=true -n monitoring

2、编写ServiceMonitor yaml文件并部署

yaml 复制代码

apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  name: elasticsearch-exporter-sm
  namespace: monitoring # 建议与 Prometheus 同命名空间，或根据 Operator 配置决定
  labels:
    release: release-prometheus # 关键：必须匹配 Prometheus 实例的 serviceMonitorSelector
spec:
  jobLabel: app.kubernetes.io/name
  selector:
    matchLabels:
      # 替换为你 exporter Service 的实际标签
      app.kubernetes.io/name: elasticsearch-exporter
      app.kubernetes.io/instance: elasticsearch-exporter
  namespaceSelector:
    any: true # 允许跨命名空间发现
  endpoints:
  - port: http-metrics # 必须匹配 Service 中定义的 port name
    interval: 30s
    scrapeTimeout: 10s
    path: /metrics

3、编写rules yaml文件

yaml 复制代码

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  name: elasticsearch-alerts
  namespace: monitoring
  labels:
    role: alert-rules
    release: release-prometheus
spec:
  groups:
  - name: elasticsearch-health
    rules:
    - alert: ElasticsearchClusterRed
      expr: elasticsearch_cluster_status{color="red"} == 1
      for: 0m
      labels:
        severity: critical
      annotations:
        summary: "ES 集群状态为 RED"
        description: "集群 {{ $labels.cluster }} 处于红色状态，部分分片不可用。"
    
    - alert: ElasticsearchClusterYellow
      expr: elasticsearch_cluster_status{color="yellow"} == 1
      for: 5m
      labels:
        severity: warning
      annotations:
        summary: "ES 集群状态为 YELLOW"
        description: "集群 {{ $labels.cluster }} 处于黄色状态，副本分片未完全分配。"

    - alert: ElasticsearchNodeDiskFull
      expr: elasticsearch_filesystem_data_free_bytes / elasticsearch_filesystem_data_size_bytes < 0.1
      for: 5m
      labels:
        severity: critical
      annotations:
        summary: "ES 节点磁盘空间不足"
        description: "节点 {{ $labels.instance }} 剩余空间低于 10%。"

4、导入grafana id

4377

13072

14191

3236