Docker-compose部署Alertmanager+Dingtalk+Prometheus+Grafana实现钉钉报警

部署监控

bash 复制代码
version: '3.7'

services:
#dingtalk
  dingtalk:
    image: timonwong/prometheus-webhook-dingtalk:latest
    container_name: dingtalk
    restart: always
    command:
      - '--config.file=/etc/prometheus-webhook-dingtalk/config.yml'
    volumes:
      - /data/monitor/dingtalk/config.yml:/etc/prometheus-webhook-dingtalk/config.yml
      - /etc/localtime:/etc/localtime:ro
    ports:
      - "8060:8060"
#alertmanager
  alertmanager:
    image: prom/alertmanager:latest
    container_name: alertmanager
    restart: always
    volumes:
      - /data/monitor/alertmanager/config/alertmanager.yml:/etc/alertmanager/alertmanager.yml
    ports:
      - "9093:9093"
#prometheus
  prometheus:
    image: prom/prometheus
    container_name: prometheus
    restart: always
    ports:
      - "9090:9090"
    volumes:
      - /data/monitor/promethues/prometheus.yml:/etc/prometheus/prometheus.yml
      - /data/monitor/promethues/alert.yml:/etc/prometheus/rule.yml
      - /etc/localtime:/etc/localtime:ro
#grafana
  grafana:
    image: grafana/grafana
    container_name: grafana
    restart: always
    ports:
      - "3000:3000"
    volumes:
      - /data/monitor/grafana:/var/lib/grafana
#node-exporter
  node-exporter:
    image: prom/node-exporter
    container_name: node-exporter
    restart: always
    ports:
      - "9100:9100"
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro    

Dingtalk配置文件

/data/monitor/dingtalk/config.yml

bash 复制代码
templates:
  - /etc/prometheus-webhook-dingtalk/templates/templates.tmpl

targets: #配置多个接收方
  webhook2:
    url: https://oapi.dingtalk.com/robot/send?access_token=钉钉token
    secret: 钉钉加签

Alertmanager配置文件

/data/monitor/alertmanager/config/alertmanager.yml

bash 复制代码
global:
  resolve_timeout: 5m
  smtp_smarthost: 'smtp.qiye.163.com:465'             #邮箱smtp服务器代理,启用SSL发信, 端口一般是465
  smtp_from: 'user@163.com'              #发送邮箱名称
  smtp_auth_username: 'user@163.com'              #邮箱名称
  smtp_auth_password: 'password'                #邮箱密码或授权码
  smtp_require_tls: false
 
route:
  receiver: 'default'
  group_wait: 10s
  group_interval: 1m
  repeat_interval: 1h
  group_by: ['alertname']
 
inhibit_rules:
- source_match:
    severity: 'critical'
  target_match:
    severity: 'warning'
  equal: ['alertname', 'instance']
  
receivers:
- name: 'default'
  webhook_configs:
  - url: 'http://dingtalk-IP:8060/dingtalk/webhook2/send'   #webhoo2匹配dingtalk targets
    send_resolved: true

Prometheus配置prometheus文件

/data/monitor/promethues/prometheus.yml

bash 复制代码
global:
  scrape_interval: 60s
  evaluation_interval: 60s
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['IP:9093']
rule_files:
  - "/etc/prometheus/rule.yml"
  - "rules/*.yml"
 
scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ['localhost:9090']
        labels:
          instance: prometheus
  - job_name: lite
    static_configs:
      - targets: ['IP:9100']
        labels:
          env: dev
  - job_name: redis_exporter
    static_configs:
      - targets: ['IP:9121']
        labels:
          env: dev
          ident: redis
  - job_name: mysql_exporter
    static_configs:
      - targets: ['IP:9104']
        labels:
          env: dev
          ident: mysql
  - job_name: emqx_exporter
    metrics_path: /api/v5/prometheus/stats
    scrape_interval: 5s
    honor_labels: true
    static_configs:
      - targets: ['IP:18083']
  - job_name: 'alertmanager'
    scrape_interval: 15s
    static_configs:
      - targets: ['IP:9100']

Prometheus配置alert文件

/data/monitor/promethues/alert.yml

bash 复制代码
groups:
- name: 服务器主机信息监控告警
  rules:
  - alert: 公司内部服务器监控
    expr: up {job="公司内部服务器"} == 0
    for: 0m
    labels:
      severity: 非常严重
    annotations:
      description: "监控的目标已丢失,请检查服务器自身或node_exporter服务"


  - alert: "内存报警"
    expr: 100 - ((node_memory_MemAvailable_bytes * 100) / node_memory_MemTotal_bytes) > 10
    for: 1m  # 告警持续时间,超过这个时间才会发送给alertmanager
    labels:
      severity: 严重
    annotations:
      summary: "{{ $labels.instance }} 内存使用率过高,请尽快处理!"
      description: "{{ $labels.instance }}内存使用率超过95%,当前使用率{{ $value }}%."


  - alert: "磁盘空间报警"
    expr: (1 - node_filesystem_avail_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"}) * 100 > 20
    for: 60s
    labels:
      severity: 严重
    annotations:
      summary: "{{ $labels.instance }}磁盘空间使用超过95%了"
      description: "{{ $labels.instance }}磁盘使用率超过95%,当前使用率{{ $value }}%."

  - alert: "CPU报警"
    expr: 100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by(instance)* 100) > 5
    for: 120s
    labels:
      severity: 严重
      instance: "{{ $labels.instance }}"
    annotations:
      summary: "{{$labels.instance}}CPU使用率超过95%了"
      description: "{{ $labels.instance }}CPU使用率超过95%,当前使用率{{ $value }}%."

  - alert: "磁盘IO性能报警"
    expr: ((irate(node_disk_io_time_seconds_total[30m]))* 100) > 95
    for: 3m
    labels:
      severity: 严重
    annotations:
      summary: "{{$labels.instance}} 流入磁盘IO使用率过高,请尽快处理!"
      description: "{{$labels.instance}} 流入磁盘IO大于95%,当前使用率{{ $value }}%."
相关推荐
Elastic 中国社区官方博客2 小时前
在 Elasticsearch 中使用原生 PromQL 支持查询 Prometheus 指标
大数据·elasticsearch·搜索引擎·信息可视化·全文检索·prometheus
Elastic 中国社区官方博客5 小时前
Elastic 9.4:Workflows 正式发布、Agent Builder 更新,以及 Prometheus / PromQL 支持
运维·数据库·人工智能·elasticsearch·搜索引擎·信息可视化·prometheus
qq_364371727 小时前
基于 Docker 容器化环境配置
运维·docker·容器
GentleDevin8 小时前
Docker 运维常用命令大全
docker·容器·运维命令
运维全栈笔记8 小时前
基于Docker的MinIO单机部署与功能测试指南
运维·docker·容器
心机之蛙qee9 小时前
docker的安装(RHEL9)
运维·docker·容器
炸炸鱼.9 小时前
Docker 高级管理 —— 容器通信技术与数据持久化
docker
乐hh10 小时前
DM8配置SSL
数据库·docker·ssl
极客先躯10 小时前
高级java每日一道面试题-2025年12月05日-实战篇[Dockerj]-Docker 安装后的默认存储路径是什么?如何修改?
java·docker·默认存储路径在不同系统上的区别·linux overlay2·修改存储路径的理论方法·修改流程中的关键理论点
凤舞飘伶10 小时前
windows安装docker-desk
windows·docker·容器