Docker-compose部署Alertmanager+Dingtalk+Prometheus+Grafana实现钉钉报警

部署监控

bash 复制代码
version: '3.7'

services:
#dingtalk
  dingtalk:
    image: timonwong/prometheus-webhook-dingtalk:latest
    container_name: dingtalk
    restart: always
    command:
      - '--config.file=/etc/prometheus-webhook-dingtalk/config.yml'
    volumes:
      - /data/monitor/dingtalk/config.yml:/etc/prometheus-webhook-dingtalk/config.yml
      - /etc/localtime:/etc/localtime:ro
    ports:
      - "8060:8060"
#alertmanager
  alertmanager:
    image: prom/alertmanager:latest
    container_name: alertmanager
    restart: always
    volumes:
      - /data/monitor/alertmanager/config/alertmanager.yml:/etc/alertmanager/alertmanager.yml
    ports:
      - "9093:9093"
#prometheus
  prometheus:
    image: prom/prometheus
    container_name: prometheus
    restart: always
    ports:
      - "9090:9090"
    volumes:
      - /data/monitor/promethues/prometheus.yml:/etc/prometheus/prometheus.yml
      - /data/monitor/promethues/alert.yml:/etc/prometheus/rule.yml
      - /etc/localtime:/etc/localtime:ro
#grafana
  grafana:
    image: grafana/grafana
    container_name: grafana
    restart: always
    ports:
      - "3000:3000"
    volumes:
      - /data/monitor/grafana:/var/lib/grafana
#node-exporter
  node-exporter:
    image: prom/node-exporter
    container_name: node-exporter
    restart: always
    ports:
      - "9100:9100"
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro    

Dingtalk配置文件

/data/monitor/dingtalk/config.yml

bash 复制代码
templates:
  - /etc/prometheus-webhook-dingtalk/templates/templates.tmpl

targets: #配置多个接收方
  webhook2:
    url: https://oapi.dingtalk.com/robot/send?access_token=钉钉token
    secret: 钉钉加签

Alertmanager配置文件

/data/monitor/alertmanager/config/alertmanager.yml

bash 复制代码
global:
  resolve_timeout: 5m
  smtp_smarthost: 'smtp.qiye.163.com:465'             #邮箱smtp服务器代理,启用SSL发信, 端口一般是465
  smtp_from: 'user@163.com'              #发送邮箱名称
  smtp_auth_username: 'user@163.com'              #邮箱名称
  smtp_auth_password: 'password'                #邮箱密码或授权码
  smtp_require_tls: false
 
route:
  receiver: 'default'
  group_wait: 10s
  group_interval: 1m
  repeat_interval: 1h
  group_by: ['alertname']
 
inhibit_rules:
- source_match:
    severity: 'critical'
  target_match:
    severity: 'warning'
  equal: ['alertname', 'instance']
  
receivers:
- name: 'default'
  webhook_configs:
  - url: 'http://dingtalk-IP:8060/dingtalk/webhook2/send'   #webhoo2匹配dingtalk targets
    send_resolved: true

Prometheus配置prometheus文件

/data/monitor/promethues/prometheus.yml

bash 复制代码
global:
  scrape_interval: 60s
  evaluation_interval: 60s
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['IP:9093']
rule_files:
  - "/etc/prometheus/rule.yml"
  - "rules/*.yml"
 
scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ['localhost:9090']
        labels:
          instance: prometheus
  - job_name: lite
    static_configs:
      - targets: ['IP:9100']
        labels:
          env: dev
  - job_name: redis_exporter
    static_configs:
      - targets: ['IP:9121']
        labels:
          env: dev
          ident: redis
  - job_name: mysql_exporter
    static_configs:
      - targets: ['IP:9104']
        labels:
          env: dev
          ident: mysql
  - job_name: emqx_exporter
    metrics_path: /api/v5/prometheus/stats
    scrape_interval: 5s
    honor_labels: true
    static_configs:
      - targets: ['IP:18083']
  - job_name: 'alertmanager'
    scrape_interval: 15s
    static_configs:
      - targets: ['IP:9100']

Prometheus配置alert文件

/data/monitor/promethues/alert.yml

bash 复制代码
groups:
- name: 服务器主机信息监控告警
  rules:
  - alert: 公司内部服务器监控
    expr: up {job="公司内部服务器"} == 0
    for: 0m
    labels:
      severity: 非常严重
    annotations:
      description: "监控的目标已丢失,请检查服务器自身或node_exporter服务"


  - alert: "内存报警"
    expr: 100 - ((node_memory_MemAvailable_bytes * 100) / node_memory_MemTotal_bytes) > 10
    for: 1m  # 告警持续时间,超过这个时间才会发送给alertmanager
    labels:
      severity: 严重
    annotations:
      summary: "{{ $labels.instance }} 内存使用率过高,请尽快处理!"
      description: "{{ $labels.instance }}内存使用率超过95%,当前使用率{{ $value }}%."


  - alert: "磁盘空间报警"
    expr: (1 - node_filesystem_avail_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"}) * 100 > 20
    for: 60s
    labels:
      severity: 严重
    annotations:
      summary: "{{ $labels.instance }}磁盘空间使用超过95%了"
      description: "{{ $labels.instance }}磁盘使用率超过95%,当前使用率{{ $value }}%."

  - alert: "CPU报警"
    expr: 100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by(instance)* 100) > 5
    for: 120s
    labels:
      severity: 严重
      instance: "{{ $labels.instance }}"
    annotations:
      summary: "{{$labels.instance}}CPU使用率超过95%了"
      description: "{{ $labels.instance }}CPU使用率超过95%,当前使用率{{ $value }}%."

  - alert: "磁盘IO性能报警"
    expr: ((irate(node_disk_io_time_seconds_total[30m]))* 100) > 95
    for: 3m
    labels:
      severity: 严重
    annotations:
      summary: "{{$labels.instance}} 流入磁盘IO使用率过高,请尽快处理!"
      description: "{{$labels.instance}} 流入磁盘IO大于95%,当前使用率{{ $value }}%."
相关推荐
zfoo-framework3 小时前
docker compose安装gitea实现公司内部开发git私有仓库
docker·容器·gitea
无巧不成书02183 小时前
基于WSL 2的Docker远程开发全栈实战指南
运维·docker·容器·docker desktop·wsl 2·vs code远程开发·容器化开发
Renhao-Wan3 小时前
Docker 核心原理详解:镜像、容器、Namespace、Cgroups 与 UnionFS
java·后端·docker·容器
赵庆明老师3 小时前
Linux Docker打包
linux·运维·docker
Eloudy4 小时前
docker pull ubuntu:22.04 失败的解决记录
运维·docker·容器
taWSw5OjU5 小时前
Docker] Docker中`overlay2`磁盘占用爆满的清理方案
docker·容器·eureka
wwj888wwj20 小时前
Docker基础(复习)
java·linux·运维·docker
DONG99920 小时前
配置docker代理
docker·容器
怎么就重名了21 小时前
docker可以动态修改端口映射吗
运维·docker·容器
JEECG低代码平台21 小时前
敲敲云零代码平台一键部署实战:命令安装 vs Docker 安装
运维·docker·容器