Docker-compose部署Alertmanager+Dingtalk+Prometheus+Grafana实现钉钉报警

部署监控

bash 复制代码
version: '3.7'

services:
#dingtalk
  dingtalk:
    image: timonwong/prometheus-webhook-dingtalk:latest
    container_name: dingtalk
    restart: always
    command:
      - '--config.file=/etc/prometheus-webhook-dingtalk/config.yml'
    volumes:
      - /data/monitor/dingtalk/config.yml:/etc/prometheus-webhook-dingtalk/config.yml
      - /etc/localtime:/etc/localtime:ro
    ports:
      - "8060:8060"
#alertmanager
  alertmanager:
    image: prom/alertmanager:latest
    container_name: alertmanager
    restart: always
    volumes:
      - /data/monitor/alertmanager/config/alertmanager.yml:/etc/alertmanager/alertmanager.yml
    ports:
      - "9093:9093"
#prometheus
  prometheus:
    image: prom/prometheus
    container_name: prometheus
    restart: always
    ports:
      - "9090:9090"
    volumes:
      - /data/monitor/promethues/prometheus.yml:/etc/prometheus/prometheus.yml
      - /data/monitor/promethues/alert.yml:/etc/prometheus/rule.yml
      - /etc/localtime:/etc/localtime:ro
#grafana
  grafana:
    image: grafana/grafana
    container_name: grafana
    restart: always
    ports:
      - "3000:3000"
    volumes:
      - /data/monitor/grafana:/var/lib/grafana
#node-exporter
  node-exporter:
    image: prom/node-exporter
    container_name: node-exporter
    restart: always
    ports:
      - "9100:9100"
    volumes:
      - /proc:/host/proc:ro
      - /sys:/host/sys:ro
      - /:/rootfs:ro    

Dingtalk配置文件

/data/monitor/dingtalk/config.yml

bash 复制代码
templates:
  - /etc/prometheus-webhook-dingtalk/templates/templates.tmpl

targets: #配置多个接收方
  webhook2:
    url: https://oapi.dingtalk.com/robot/send?access_token=钉钉token
    secret: 钉钉加签

Alertmanager配置文件

/data/monitor/alertmanager/config/alertmanager.yml

bash 复制代码
global:
  resolve_timeout: 5m
  smtp_smarthost: 'smtp.qiye.163.com:465'             #邮箱smtp服务器代理,启用SSL发信, 端口一般是465
  smtp_from: 'user@163.com'              #发送邮箱名称
  smtp_auth_username: 'user@163.com'              #邮箱名称
  smtp_auth_password: 'password'                #邮箱密码或授权码
  smtp_require_tls: false
 
route:
  receiver: 'default'
  group_wait: 10s
  group_interval: 1m
  repeat_interval: 1h
  group_by: ['alertname']
 
inhibit_rules:
- source_match:
    severity: 'critical'
  target_match:
    severity: 'warning'
  equal: ['alertname', 'instance']
  
receivers:
- name: 'default'
  webhook_configs:
  - url: 'http://dingtalk-IP:8060/dingtalk/webhook2/send'   #webhoo2匹配dingtalk targets
    send_resolved: true

Prometheus配置prometheus文件

/data/monitor/promethues/prometheus.yml

bash 复制代码
global:
  scrape_interval: 60s
  evaluation_interval: 60s
alerting:
  alertmanagers:
  - static_configs:
    - targets: ['IP:9093']
rule_files:
  - "/etc/prometheus/rule.yml"
  - "rules/*.yml"
 
scrape_configs:
  - job_name: prometheus
    static_configs:
      - targets: ['localhost:9090']
        labels:
          instance: prometheus
  - job_name: lite
    static_configs:
      - targets: ['IP:9100']
        labels:
          env: dev
  - job_name: redis_exporter
    static_configs:
      - targets: ['IP:9121']
        labels:
          env: dev
          ident: redis
  - job_name: mysql_exporter
    static_configs:
      - targets: ['IP:9104']
        labels:
          env: dev
          ident: mysql
  - job_name: emqx_exporter
    metrics_path: /api/v5/prometheus/stats
    scrape_interval: 5s
    honor_labels: true
    static_configs:
      - targets: ['IP:18083']
  - job_name: 'alertmanager'
    scrape_interval: 15s
    static_configs:
      - targets: ['IP:9100']

Prometheus配置alert文件

/data/monitor/promethues/alert.yml

bash 复制代码
groups:
- name: 服务器主机信息监控告警
  rules:
  - alert: 公司内部服务器监控
    expr: up {job="公司内部服务器"} == 0
    for: 0m
    labels:
      severity: 非常严重
    annotations:
      description: "监控的目标已丢失,请检查服务器自身或node_exporter服务"


  - alert: "内存报警"
    expr: 100 - ((node_memory_MemAvailable_bytes * 100) / node_memory_MemTotal_bytes) > 10
    for: 1m  # 告警持续时间,超过这个时间才会发送给alertmanager
    labels:
      severity: 严重
    annotations:
      summary: "{{ $labels.instance }} 内存使用率过高,请尽快处理!"
      description: "{{ $labels.instance }}内存使用率超过95%,当前使用率{{ $value }}%."


  - alert: "磁盘空间报警"
    expr: (1 - node_filesystem_avail_bytes{fstype=~"ext4|xfs"} / node_filesystem_size_bytes{fstype=~"ext4|xfs"}) * 100 > 20
    for: 60s
    labels:
      severity: 严重
    annotations:
      summary: "{{ $labels.instance }}磁盘空间使用超过95%了"
      description: "{{ $labels.instance }}磁盘使用率超过95%,当前使用率{{ $value }}%."

  - alert: "CPU报警"
    expr: 100-(avg(irate(node_cpu_seconds_total{mode="idle"}[5m])) by(instance)* 100) > 5
    for: 120s
    labels:
      severity: 严重
      instance: "{{ $labels.instance }}"
    annotations:
      summary: "{{$labels.instance}}CPU使用率超过95%了"
      description: "{{ $labels.instance }}CPU使用率超过95%,当前使用率{{ $value }}%."

  - alert: "磁盘IO性能报警"
    expr: ((irate(node_disk_io_time_seconds_total[30m]))* 100) > 95
    for: 3m
    labels:
      severity: 严重
    annotations:
      summary: "{{$labels.instance}} 流入磁盘IO使用率过高,请尽快处理!"
      description: "{{$labels.instance}} 流入磁盘IO大于95%,当前使用率{{ $value }}%."
相关推荐
小小的木头人13 小时前
Docker Compose 镜像检测脚本(支持自动扫描 + 手动输入 YAML)
运维·docker·容器
狐狐生风13 小时前
LangGraph 生产级部署全解:FastAPI + Docker
python·docker·langchain·prompt·fastapi·langgraph·agentai
码农阿豪14 小时前
Docker 部署 XiuXianGame 文字修仙游戏:极空间 NAS 上随时挂机刷资源
游戏·docker·容器
小坏讲微服务15 小时前
小白搭建K8S集群0基础教程实战
docker·云原生·容器·kubernetes
xingfujie16 小时前
Ubuntu K8s 1.28 kubeadm 高可用集群部署实战
linux·运维·服务器·docker·kubernetes
AI视觉网奇16 小时前
docker vllm 开机启动
docker·容器·vllm
px不是xp16 小时前
Docker部署Qdrant向量数据库,初始化向量数据库,重构RAG逻辑
数据库·docker·微信小程序·重构·qdrant
KK溜了溜了16 小时前
Prometheus配置监控项和告警规则
linux·grafana·prometheus
小夏子_riotous17 小时前
Kubernetes学习路径——3. Kubernetes 1.25 高可用集群部署实战:从 Docker 到 Calico 全链路详解
linux·运维·学习·docker·容器·kubernetes·centos
bukeyiwanshui17 小时前
20260512 docker笔记
linux·运维·笔记·docker·容器