Prometheus

bash 复制代码
root@consumer:/apps/prometheus# vim rules/record-rule-mysql.yaml
groups:
- name: mysqld_rules
  rules:

  # Record slave lag seconds for pre-computed timeseries that takes
  # `mysql_slave_status_sql_delay` into account
  - record: instance:mysql_slave_lag_seconds
    expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay
root@consumer:/apps/prometheus/rules# cat record-node-rules.yaml 
groups:
- name: custom_rules
  interval: 5s
  rules:
  - record: instance:node_cpu:avg_rate5m
    expr: (1 - avg(irate(node_cpu_seconds_total{job="node", mode="idle"}[5m])) by (instance)) * 100

  - record: instace:node_memory_MemFree_percent
    expr: 100 * (node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes

  - record: instance:root:node_filesystem_free_percent
    expr: 100 * node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}

  # Record slave lag via heartbeat method
  - record: instance:mysql_heartbeat_lag_seconds
    expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds

  - record: job:mysql_transactions:rate5m
    expr: sum without (command) (rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))
bash 复制代码
root@consumer:/apps/prometheus/rules# cat record-node-rules.yaml 
groups:
- name: custom_rules
  interval: 5s
  rules:
  - record: instance:node_cpu:avg_rate5m
    expr: (1 - avg(irate(node_cpu_seconds_total{job="node", mode="idle"}[5m])) by (instance)) * 100

  - record: instace:node_memory_MemFree_percent
    expr: 100 * (node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes

  - record: instance:root:node_filesystem_free_percent
    expr: 100 * node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}

Prometheus添加引用上面的二个文件

安装grafana

wget https://mirrors.tuna.tsinghua.edu.cn/grafana/apt/pool/main/g/grafana/grafana_10.0.1_amd64.deb

dpkg 安装

修改配置为中文

添加源

添加模版

查看

部署

wget https://github.com/prometheus/alertmanager/releases/download/v0.26.0/alertmanager-0.26.0.linux-amd64.tar.gz

root@consumer:~# mv alertmanager-0.22.2.linux-amd64 /usr/local/alertmanager

bash 复制代码
vi /lib/systemd/system/alertmanager.service
[Unit]
Description=Prometheus Alertmanager
After=network.target

[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file="/usr/local/alertmanager/alertmanager.yml"

[Install]
WantedBy=multi-user.targe

systemctl daemon-reload

systemctl start alertmanager.service

systemctl status alertmanager.service

systemctl enable alertmanager.service

访问alertmanager界面

http://192.168.1.73:9093/#/status

altermanager配置发件和收件信息

bash 复制代码
cat  alertmanager.yml 
global:
  #在5m内收到Prometheus发来相同告警情况下认为告警已经恢复
  resolve_timeout: 5m
  #SMTP邮件服务器配置
  smtp_smarthost: 'smtp.qq.com:465'
  smtp_from: '2368756722@qq.com'
  smtp_auth_username: '2368756722@qq.com'
  smtp_auth_password: 'yipmyirnobcsecej'
  smtp_require_tls: false       #访问smtp服务器是否需要tls
  smtp_hello: "qq.com"  #向SMTP服务器发送测试消息的内容
route:
  group_by: ['alertname']
  group_wait: 30s
  group_interval: 5m
  repeat_interval: 1h
  receiver: 'email'
receivers:
- name: 'web.hook'
  webhook_configs:
  - url: 'http://127.0.0.1:5001/'
- name: 'email'
  email_configs:
    - to: '15115440657@163.com'
      send_resolved: true
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

Prometheus添加rule文件

root@consumer:/apps/prometheus# ls rules2

Prometheus查看

配置文件添加

bash 复制代码
cat alertmanager.yml 
global:
  #在5m内收到Prometheus发来相同告警情况下认为告警已经恢复
  resolve_timeout: 5m
  #SMTP邮件服务器配置
  smtp_smarthost: 'smtp.qq.com:465'
  smtp_from: '2368756722@qq.com'
  smtp_auth_username: '2368756722@qq.com'
  smtp_auth_password: 'yipmyirnobcsecej'
  smtp_require_tls: false       #访问smtp服务器是否需要tls
  smtp_hello: "qq.com"  #向SMTP服务器发送测试消息的内容
route:
  group_by: ['alertname']
  group_wait: 30s
  group_interval: 5m
  repeat_interval: 1h
  receiver: 'email'
templates:
  - '/usr/local/alertmanager/email_template.tmpl'  #启用告警的模板
receivers:
- name: 'web.hook'
  webhook_configs:
  - url: 'http://127.0.0.1:5001/'
- name: 'email'
  email_configs:
    - to: '15115440657@163.com'
      headers:
        subject: "{{ .Status | toUpper }} {{ .CommonLabels.env }}:{{ .CommonLabels.cluster }} {{ .CommonLabels.alertname }}"
      html: '{{ template "email.to.html" . }}'
      send_resolved: true
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

邮件告警模版

bash 复制代码
root@consumer:/usr/local/alertmanager# cat email_template.tmpl 
{{ define "email.to.html" }}
{{- if gt (len .Alerts.Firing) 0 -}}
{{ range .Alerts }}
=========start==========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} <br>
告警类型: {{ .Labels.alertname }} <br>
告警主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }}  <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }} <br>
=========end==========<br>
{{ end }}{{ end -}}
 
{{- if gt (len .Alerts.Resolved) 0 -}}
{{ range .Alerts }}
=========start==========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} <br>
告警类型: {{ .Labels.alertname }} <br>
告警主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }} <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }} <br>
恢复时间: {{ .EndsAt.Format "2006-01-02 15:04:05" }} <br>
=========end==========<br>
{{ end }}{{ end -}}
{{- end }}
相关推荐
H1006 分钟前
重构(二)
android·重构
拓端研究室16 分钟前
R基于贝叶斯加法回归树BART、MCMC的DLNM分布滞后非线性模型分析母婴PM2.5暴露与出生体重数据及GAM模型对比、关键窗口识别
android·开发语言·kotlin
zhangphil1 小时前
Android简洁缩放Matrix实现图像马赛克,Kotlin
android·kotlin
m0_512744641 小时前
极客大挑战2024-web-wp(详细)
android·前端
lw向北.1 小时前
Qt For Android之环境搭建(Qt 5.12.11 Qt下载SDK的处理方案)
android·开发语言·qt
不爱学习的啊Biao2 小时前
【13】MySQL如何选择合适的索引?
android·数据库·mysql
Clockwiseee2 小时前
PHP伪协议总结
android·开发语言·php
mmsx9 小时前
android sqlite 数据库简单封装示例(java)
android·java·数据库
众拾达人12 小时前
Android自动化测试实战 Java篇 主流工具 框架 脚本
android·java·开发语言
吃着火锅x唱着歌12 小时前
PHP7内核剖析 学习笔记 第四章 内存管理(1)
android·笔记·学习