阿里云starrocks监控告发至钉钉群

背景:新入职一家公司,现场没有对sr的进行监控,根据开发的需求编写了一个python脚本。

脚本逻辑:抓取sr的be/fe/routine load状态信息,判读是否触发告警,若满足告警条件,则发送告警信息到钉钉群,并艾特对应的责任人。

python 复制代码
# -*- coding: utf-8 -*-
""""
author: zjh
date: 2023-09-28
description:StarRocks cluster monitoring, include routine load,frontend,backend status.
             And then send warn message to dingtalk.
"""
import base64
import urllib
import requests
import json
import time
import hmac
import hashlib
import pymysql


def msg(warntype, content, token):
    # 根据实际修改1:钉钉机器人地址
    dingding_url = 'https://oapi.dingtalk.com/robot/send?access_token=' + token
    # 根据实际修改2:钉钉的加签
    secret = 'SECd0c0f14733789a******************645a4d1ae76ea3481b1384f5ef'

    # 加签算法,钉钉文档里的
    timestamp = str(round(time.time() * 1000))
    secret_enc = secret.encode('utf-8')
    string_to_sign = '{}\n{}'.format(timestamp, secret)
    string_to_sign_enc = string_to_sign.encode('utf-8')
    hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
    sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))

    headers = {'Content-Type': 'application/json;charset=utf-8'}
    api_url = dingding_url + "&timestamp={}&sign={}".format(timestamp, sign) ##加签算法使用

    if warntype == 'routineloadwarn':
        owner=["dfdfs134"]

    if warntype == 'fewarn' or warntype == 'bewarn':
        owner=["faasdfaAQE"]

    json_text = {
        "at": {
            "atMobiles": [
                "180xxxxxx"
            ],
            "atDingtalkIds":owner,
            "isAtAll": "False"
        },
        "msgtype": "text",
        "text": {
            "content": content
        }

    }

    requests.post(api_url, json.dumps(json_text), headers=headers)

def srmsg(ip,port,username,passwd,db,exe_sql):
    conn = pymysql.connect(host=ip,port=port,user=username,password=passwd,database=db)
    cursor = conn.cursor()
    sql = exe_sql
    cursor.execute(sql)
    result=cursor.fetchall()
    cursor.close()
    conn.close()
    return  result

def fewarn(clustername,base_tuple_fe,token):
    warntype='fewarn'
     #2.4.1版本SR
    if clustername == 'SR':
        for feinfo in base_tuple_fe:
            # print(feinfo)
            if feinfo[10] == "false":
                fewanrmsg = "集群: " + clustername + "\n" + \
                            "异常FE: " + feinfo[1] + "\n" + \
                            "状态: " + feinfo[10] + "\n" + \
                            "上次启动时间: " + feinfo[15]
                msg(warntype, fewanrmsg, token)
    else:
     #2.4.1版本SR
        for feinfo in base_tuple_fe:
            # print(feinfo)
            if feinfo[9] == "false":
                fewanrmsg = "集群: " + clustername + "\n" + \
                            "异常FE: " + feinfo[1] + "\n" + \
                            "状态: " + feinfo[9] + "\n" + \
                            "上次启动时间: " + feinfo[14]
                msg(warntype, fewanrmsg, token)

def bewarn(clustername,base_tuple_be,token):
    warntype='bewarn'
    #2.4.1版本SR
    if clustername == 'SatrRocks':
        for beinfo in base_tuple_be:
            # print(beinfo)
            if beinfo[9] == "false":
                bewanrmsg = "集群: " + clustername + "\n" + \
                            "异常BE: " + beinfo[2] + "\n" + \
                            "状态: " + beinfo[9] + "\n" + \
                            "上次启动时间: " + beinfo[7]
                msg(warntype, bewanrmsg, token)
    else:
    #2.3.1版本sr
        for beinfo in base_tuple_be:
            # print(beinfo)
            if beinfo[8] == "false":
                bewanrmsg = "集群: " + clustername + "\n" + \
                            "异常BE: " + beinfo[1] + "\n" + \
                            "状态: " + beinfo[8] + "\n" + \
                            "上次启动时间: " + beinfo[6]
                msg(warntype, bewanrmsg, token)

def routineloadwarn(clustername,load_tuple,token):
    warntype='routineloadwarn'
    for info in load_tuple:
        #title = dict_srprod["envname"]
        input_message= "集群: "+ clustername + "\n" +\
                       "任务id: "+ info[0] + "\n" +\
                       "任务名: "+ info[1] + "\n" +\
                       "库名: " + info[5]+ "\n" +\
                       "表名: " + info[6]+ "\n" +\
                       "状态: " + info[7]+ "\n" +\
                       "挂起时间: "+ info[2] + "\n" +\
                       "日志链接:" + info[16] + "\n" +\
                       "其他错误:" + info[17]
        msg(warntype, input_message, token)



if __name__ == '__main__':
    dict_srprod={"envname":"生产SatrRocks","ip":"192.168.10.10","port":29030,"username":"root","passwd":"********","dbname":["test1","test2"],
                 "check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED' and TableName in ('t1','t2');"
                 }
    dict_qwprod={"envname":"生产(1)SatrRocks","ip":"192.168.10.10","port":39030,"username":"root","passwd":"********","dbname":["test0","test"],
                 "check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED';"
                 }
    dict_srsit={"envname":"测试SatrRocks","ip":"192.168.10.11","port":19030,"username":"root","passwd":"********","dbname":["test1","test2","test3"],
                 "check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED' and TableName='t0';"
                 }

    token = "xxxxx"

    """
   环境0
    """
    #生产 routine load monitor
    base_tuple=srmsg(dict_srprod["ip"],dict_srprod["port"],
          dict_srprod["username"],dict_srprod["passwd"],
          dict_srprod["dbname"][0],dict_srprod["check_rtload"])

    routineloadwarn(dict_srprod["envname"],base_tuple,token)

    # 生产be/fe状态监控
    base_tuple_fe = srmsg(dict_srprod["ip"], dict_srprod["port"],
                          dict_srprod["username"], dict_srprod["passwd"],
                          dict_srprod["dbname"][0], dict_srprod["check_fe_status"])
    fewarn(dict_srprod["envname"],base_tuple_fe,token)

    base_tuple_be = srmsg(dict_srprod["ip"], dict_srprod["port"],
                          dict_srprod["username"], dict_srprod["passwd"],
                          dict_srprod["dbname"][0], dict_srprod["check_be_status"])
    bewarn(dict_srprod["envname"],base_tuple_be,token)

    """
    环境1
    """
    #### routine load monitor
    base_tuple=srmsg(dict_qwprod["ip"],dict_qwprod["port"],
          dict_qwprod["username"],dict_qwprod["passwd"],
          dict_qwprod["dbname"][0],dict_qwprod["check_rtload"])

    routineloadwarn(dict_qwprod["envname"],base_tuple,token)

    # be/fe状态监控
    base_tuple_fe = srmsg(dict_qwprod["ip"], dict_qwprod["port"],
                          dict_qwprod["username"], dict_qwprod["passwd"],
                          dict_qwprod["dbname"][0], dict_qwprod["check_fe_status"])
    fewarn(dict_qwprod["envname"],base_tuple_fe,token)

    base_tuple_be = srmsg(dict_qwprod["ip"], dict_qwprod["port"],
                          dict_qwprod["username"], dict_qwprod["passwd"],
                          dict_qwprod["dbname"][0], dict_qwprod["check_be_status"])
    bewarn(dict_qwprod["envname"],base_tuple_be,token)

    """
    环境2
    """
    ####测试routine load monitor
    base_tuple=srmsg(dict_srsit["ip"],dict_srsit["port"],
          dict_srsit["username"],dict_srsit["passwd"],
          dict_srsit["dbname"][0],dict_srsit["check_rtload"])

    routineloadwarn(dict_srsit["envname"],base_tuple,token)

    # 测试be/fe状态监控
    base_tuple_fe = srmsg(dict_srsit["ip"], dict_srsit["port"],
                          dict_srsit["username"], dict_srsit["passwd"],
                          dict_srsit["dbname"][0], dict_srsit["check_fe_status"])
    fewarn(dict_srsit["envname"],base_tuple_fe,token)

    base_tuple_be = srmsg(dict_srsit["ip"], dict_srsit["port"],
                          dict_srsit["username"], dict_srsit["passwd"],
                          dict_srsit["dbname"][0], dict_srsit["check_be_status"])
    bewarn(dict_srsit["envname"],base_tuple_be,token)

另外,钉钉的一些开发说明,请参阅钉钉官网

注册机器人链接:https://open.dingtalk.com/document/connector/alarm-subscription

相关推荐
AOwhisky15 分钟前
Redis 学习笔记(第一期):概述、安装配置与核心理论
运维·数据库·redis·笔记·学习·云计算
ytttr87323 分钟前
C# 定时数据库备份工具
开发语言·数据库·c#
睡不醒男孩03082334 分钟前
自建 Prometheus+Grafana 与 CLUP 深度监控 PG 集群有什么区别?
数据库·oracle
AOwhisky43 分钟前
Redis 学习笔记(第四期):高可用与集群(哨兵 + Cluster + 容器化)
linux·运维·数据库·redis·笔记·学习·缓存
猫猫聚会Ing1 小时前
数据库设计 Prompt 提示词 - 构建与迭代
数据库
上海云盾-小余1 小时前
源站隐藏实战:规避裸 IP 被直接攻击的完整方案
数据库·网络协议·tcp/ip
微学AI2 小时前
时序大模型 TimechoAI 赋能工业时序数据底层技术优势与实操
数据库·大模型·时序大模型
北顾笙9802 小时前
MYSQL-day03
数据库·sql·mysql
MXsoft6182 小时前
**混合云统一监控实践:私有云+公有云的一体化运维方案**
运维·网络·数据库
2601_961875243 小时前
法考资料全套2026|客观题|主观题|资料已整理
阿里云·云计算·腾讯云·azure·七牛云存储·csdn开发云·火山引擎