1、helm部署es-exporter
shell
helm upgrade --install es-exporter prometheus-community/prometheus-elasticsearch-exporter --set es.uri=https://elastic:xxx@172.31.107.43:9200 --set es.sslSkipVerify=true -n monitoring
2、编写ServiceMonitor yaml文件并部署
yaml
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: elasticsearch-exporter-sm
namespace: monitoring # 建议与 Prometheus 同命名空间,或根据 Operator 配置决定
labels:
release: release-prometheus # 关键:必须匹配 Prometheus 实例的 serviceMonitorSelector
spec:
jobLabel: app.kubernetes.io/name
selector:
matchLabels:
# 替换为你 exporter Service 的实际标签
app.kubernetes.io/name: elasticsearch-exporter
app.kubernetes.io/instance: elasticsearch-exporter
namespaceSelector:
any: true # 允许跨命名空间发现
endpoints:
- port: http-metrics # 必须匹配 Service 中定义的 port name
interval: 30s
scrapeTimeout: 10s
path: /metrics
3、编写rules yaml文件
yaml
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: elasticsearch-alerts
namespace: monitoring
labels:
role: alert-rules
release: release-prometheus
spec:
groups:
- name: elasticsearch-health
rules:
- alert: ElasticsearchClusterRed
expr: elasticsearch_cluster_status{color="red"} == 1
for: 0m
labels:
severity: critical
annotations:
summary: "ES 集群状态为 RED"
description: "集群 {{ $labels.cluster }} 处于红色状态,部分分片不可用。"
- alert: ElasticsearchClusterYellow
expr: elasticsearch_cluster_status{color="yellow"} == 1
for: 5m
labels:
severity: warning
annotations:
summary: "ES 集群状态为 YELLOW"
description: "集群 {{ $labels.cluster }} 处于黄色状态,副本分片未完全分配。"
- alert: ElasticsearchNodeDiskFull
expr: elasticsearch_filesystem_data_free_bytes / elasticsearch_filesystem_data_size_bytes < 0.1
for: 5m
labels:
severity: critical
annotations:
summary: "ES 节点磁盘空间不足"
description: "节点 {{ $labels.instance }} 剩余空间低于 10%。"
4、导入grafana id
4377
13072
14191
3236