bash
root@consumer:/apps/prometheus# vim rules/record-rule-mysql.yaml
groups:
- name: mysqld_rules
rules:
# Record slave lag seconds for pre-computed timeseries that takes
# `mysql_slave_status_sql_delay` into account
- record: instance:mysql_slave_lag_seconds
expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay
root@consumer:/apps/prometheus/rules# cat record-node-rules.yaml
groups:
- name: custom_rules
interval: 5s
rules:
- record: instance:node_cpu:avg_rate5m
expr: (1 - avg(irate(node_cpu_seconds_total{job="node", mode="idle"}[5m])) by (instance)) * 100
- record: instace:node_memory_MemFree_percent
expr: 100 * (node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes
- record: instance:root:node_filesystem_free_percent
expr: 100 * node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}
# Record slave lag via heartbeat method
- record: instance:mysql_heartbeat_lag_seconds
expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds
- record: job:mysql_transactions:rate5m
expr: sum without (command) (rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))
bash
root@consumer:/apps/prometheus/rules# cat record-node-rules.yaml
groups:
- name: custom_rules
interval: 5s
rules:
- record: instance:node_cpu:avg_rate5m
expr: (1 - avg(irate(node_cpu_seconds_total{job="node", mode="idle"}[5m])) by (instance)) * 100
- record: instace:node_memory_MemFree_percent
expr: 100 * (node_memory_Buffers_bytes + node_memory_Cached_bytes + node_memory_MemFree_bytes) / node_memory_MemTotal_bytes
- record: instance:root:node_filesystem_free_percent
expr: 100 * node_filesystem_free_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}
Prometheus添加引用上面的二个文件
安装grafana
wget https://mirrors.tuna.tsinghua.edu.cn/grafana/apt/pool/main/g/grafana/grafana_10.0.1_amd64.deb
dpkg 安装
修改配置为中文
添加源
添加模版
查看
部署
root@consumer:~# mv alertmanager-0.22.2.linux-amd64 /usr/local/alertmanager
bash
vi /lib/systemd/system/alertmanager.service
[Unit]
Description=Prometheus Alertmanager
After=network.target
[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file="/usr/local/alertmanager/alertmanager.yml"
[Install]
WantedBy=multi-user.targe
systemctl daemon-reload
systemctl start alertmanager.service
systemctl status alertmanager.service
systemctl enable alertmanager.service
访问alertmanager界面
http://192.168.1.73:9093/#/status
altermanager配置发件和收件信息
bash
cat alertmanager.yml
global:
#在5m内收到Prometheus发来相同告警情况下认为告警已经恢复
resolve_timeout: 5m
#SMTP邮件服务器配置
smtp_smarthost: 'smtp.qq.com:465'
smtp_from: '2368756722@qq.com'
smtp_auth_username: '2368756722@qq.com'
smtp_auth_password: 'yipmyirnobcsecej'
smtp_require_tls: false #访问smtp服务器是否需要tls
smtp_hello: "qq.com" #向SMTP服务器发送测试消息的内容
route:
group_by: ['alertname']
group_wait: 30s
group_interval: 5m
repeat_interval: 1h
receiver: 'email'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
- name: 'email'
email_configs:
- to: '15115440657@163.com'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
Prometheus添加rule文件
root@consumer:/apps/prometheus# ls rules2
Prometheus查看
配置文件添加
bash
cat alertmanager.yml
global:
#在5m内收到Prometheus发来相同告警情况下认为告警已经恢复
resolve_timeout: 5m
#SMTP邮件服务器配置
smtp_smarthost: 'smtp.qq.com:465'
smtp_from: '2368756722@qq.com'
smtp_auth_username: '2368756722@qq.com'
smtp_auth_password: 'yipmyirnobcsecej'
smtp_require_tls: false #访问smtp服务器是否需要tls
smtp_hello: "qq.com" #向SMTP服务器发送测试消息的内容
route:
group_by: ['alertname']
group_wait: 30s
group_interval: 5m
repeat_interval: 1h
receiver: 'email'
templates:
- '/usr/local/alertmanager/email_template.tmpl' #启用告警的模板
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
- name: 'email'
email_configs:
- to: '15115440657@163.com'
headers:
subject: "{{ .Status | toUpper }} {{ .CommonLabels.env }}:{{ .CommonLabels.cluster }} {{ .CommonLabels.alertname }}"
html: '{{ template "email.to.html" . }}'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
邮件告警模版
bash
root@consumer:/usr/local/alertmanager# cat email_template.tmpl
{{ define "email.to.html" }}
{{- if gt (len .Alerts.Firing) 0 -}}
{{ range .Alerts }}
=========start==========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} <br>
告警类型: {{ .Labels.alertname }} <br>
告警主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }} <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }} <br>
=========end==========<br>
{{ end }}{{ end -}}
{{- if gt (len .Alerts.Resolved) 0 -}}
{{ range .Alerts }}
=========start==========<br>
告警程序: prometheus_alert <br>
告警级别: {{ .Labels.severity }} <br>
告警类型: {{ .Labels.alertname }} <br>
告警主机: {{ .Labels.instance }} <br>
告警主题: {{ .Annotations.summary }} <br>
告警详情: {{ .Annotations.description }} <br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }} <br>
恢复时间: {{ .EndsAt.Format "2006-01-02 15:04:05" }} <br>
=========end==========<br>
{{ end }}{{ end -}}
{{- end }}