阿里云starrocks监控告发至钉钉群

背景:新入职一家公司,现场没有对sr的进行监控,根据开发的需求编写了一个python脚本。

脚本逻辑:抓取sr的be/fe/routine load状态信息,判读是否触发告警,若满足告警条件,则发送告警信息到钉钉群,并艾特对应的责任人。

python 复制代码
# -*- coding: utf-8 -*-
""""
author: zjh
date: 2023-09-28
description:StarRocks cluster monitoring, include routine load,frontend,backend status.
             And then send warn message to dingtalk.
"""
import base64
import urllib
import requests
import json
import time
import hmac
import hashlib
import pymysql


def msg(warntype, content, token):
    # 根据实际修改1:钉钉机器人地址
    dingding_url = 'https://oapi.dingtalk.com/robot/send?access_token=' + token
    # 根据实际修改2:钉钉的加签
    secret = 'SECd0c0f14733789a******************645a4d1ae76ea3481b1384f5ef'

    # 加签算法,钉钉文档里的
    timestamp = str(round(time.time() * 1000))
    secret_enc = secret.encode('utf-8')
    string_to_sign = '{}\n{}'.format(timestamp, secret)
    string_to_sign_enc = string_to_sign.encode('utf-8')
    hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest()
    sign = urllib.parse.quote_plus(base64.b64encode(hmac_code))

    headers = {'Content-Type': 'application/json;charset=utf-8'}
    api_url = dingding_url + "&timestamp={}&sign={}".format(timestamp, sign) ##加签算法使用

    if warntype == 'routineloadwarn':
        owner=["dfdfs134"]

    if warntype == 'fewarn' or warntype == 'bewarn':
        owner=["faasdfaAQE"]

    json_text = {
        "at": {
            "atMobiles": [
                "180xxxxxx"
            ],
            "atDingtalkIds":owner,
            "isAtAll": "False"
        },
        "msgtype": "text",
        "text": {
            "content": content
        }

    }

    requests.post(api_url, json.dumps(json_text), headers=headers)

def srmsg(ip,port,username,passwd,db,exe_sql):
    conn = pymysql.connect(host=ip,port=port,user=username,password=passwd,database=db)
    cursor = conn.cursor()
    sql = exe_sql
    cursor.execute(sql)
    result=cursor.fetchall()
    cursor.close()
    conn.close()
    return  result

def fewarn(clustername,base_tuple_fe,token):
    warntype='fewarn'
     #2.4.1版本SR
    if clustername == 'SR':
        for feinfo in base_tuple_fe:
            # print(feinfo)
            if feinfo[10] == "false":
                fewanrmsg = "集群: " + clustername + "\n" + \
                            "异常FE: " + feinfo[1] + "\n" + \
                            "状态: " + feinfo[10] + "\n" + \
                            "上次启动时间: " + feinfo[15]
                msg(warntype, fewanrmsg, token)
    else:
     #2.4.1版本SR
        for feinfo in base_tuple_fe:
            # print(feinfo)
            if feinfo[9] == "false":
                fewanrmsg = "集群: " + clustername + "\n" + \
                            "异常FE: " + feinfo[1] + "\n" + \
                            "状态: " + feinfo[9] + "\n" + \
                            "上次启动时间: " + feinfo[14]
                msg(warntype, fewanrmsg, token)

def bewarn(clustername,base_tuple_be,token):
    warntype='bewarn'
    #2.4.1版本SR
    if clustername == 'SatrRocks':
        for beinfo in base_tuple_be:
            # print(beinfo)
            if beinfo[9] == "false":
                bewanrmsg = "集群: " + clustername + "\n" + \
                            "异常BE: " + beinfo[2] + "\n" + \
                            "状态: " + beinfo[9] + "\n" + \
                            "上次启动时间: " + beinfo[7]
                msg(warntype, bewanrmsg, token)
    else:
    #2.3.1版本sr
        for beinfo in base_tuple_be:
            # print(beinfo)
            if beinfo[8] == "false":
                bewanrmsg = "集群: " + clustername + "\n" + \
                            "异常BE: " + beinfo[1] + "\n" + \
                            "状态: " + beinfo[8] + "\n" + \
                            "上次启动时间: " + beinfo[6]
                msg(warntype, bewanrmsg, token)

def routineloadwarn(clustername,load_tuple,token):
    warntype='routineloadwarn'
    for info in load_tuple:
        #title = dict_srprod["envname"]
        input_message= "集群: "+ clustername + "\n" +\
                       "任务id: "+ info[0] + "\n" +\
                       "任务名: "+ info[1] + "\n" +\
                       "库名: " + info[5]+ "\n" +\
                       "表名: " + info[6]+ "\n" +\
                       "状态: " + info[7]+ "\n" +\
                       "挂起时间: "+ info[2] + "\n" +\
                       "日志链接:" + info[16] + "\n" +\
                       "其他错误:" + info[17]
        msg(warntype, input_message, token)



if __name__ == '__main__':
    dict_srprod={"envname":"生产SatrRocks","ip":"192.168.10.10","port":29030,"username":"root","passwd":"********","dbname":["test1","test2"],
                 "check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED' and TableName in ('t1','t2');"
                 }
    dict_qwprod={"envname":"生产(1)SatrRocks","ip":"192.168.10.10","port":39030,"username":"root","passwd":"********","dbname":["test0","test"],
                 "check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED';"
                 }
    dict_srsit={"envname":"测试SatrRocks","ip":"192.168.10.11","port":19030,"username":"root","passwd":"********","dbname":["test1","test2","test3"],
                 "check_fe_status":"show frontends;","check_be_status":"show backends;","check_rtload":"show routine load where State='PAUSED' and TableName='t0';"
                 }

    token = "xxxxx"

    """
   环境0
    """
    #生产 routine load monitor
    base_tuple=srmsg(dict_srprod["ip"],dict_srprod["port"],
          dict_srprod["username"],dict_srprod["passwd"],
          dict_srprod["dbname"][0],dict_srprod["check_rtload"])

    routineloadwarn(dict_srprod["envname"],base_tuple,token)

    # 生产be/fe状态监控
    base_tuple_fe = srmsg(dict_srprod["ip"], dict_srprod["port"],
                          dict_srprod["username"], dict_srprod["passwd"],
                          dict_srprod["dbname"][0], dict_srprod["check_fe_status"])
    fewarn(dict_srprod["envname"],base_tuple_fe,token)

    base_tuple_be = srmsg(dict_srprod["ip"], dict_srprod["port"],
                          dict_srprod["username"], dict_srprod["passwd"],
                          dict_srprod["dbname"][0], dict_srprod["check_be_status"])
    bewarn(dict_srprod["envname"],base_tuple_be,token)

    """
    环境1
    """
    #### routine load monitor
    base_tuple=srmsg(dict_qwprod["ip"],dict_qwprod["port"],
          dict_qwprod["username"],dict_qwprod["passwd"],
          dict_qwprod["dbname"][0],dict_qwprod["check_rtload"])

    routineloadwarn(dict_qwprod["envname"],base_tuple,token)

    # be/fe状态监控
    base_tuple_fe = srmsg(dict_qwprod["ip"], dict_qwprod["port"],
                          dict_qwprod["username"], dict_qwprod["passwd"],
                          dict_qwprod["dbname"][0], dict_qwprod["check_fe_status"])
    fewarn(dict_qwprod["envname"],base_tuple_fe,token)

    base_tuple_be = srmsg(dict_qwprod["ip"], dict_qwprod["port"],
                          dict_qwprod["username"], dict_qwprod["passwd"],
                          dict_qwprod["dbname"][0], dict_qwprod["check_be_status"])
    bewarn(dict_qwprod["envname"],base_tuple_be,token)

    """
    环境2
    """
    ####测试routine load monitor
    base_tuple=srmsg(dict_srsit["ip"],dict_srsit["port"],
          dict_srsit["username"],dict_srsit["passwd"],
          dict_srsit["dbname"][0],dict_srsit["check_rtload"])

    routineloadwarn(dict_srsit["envname"],base_tuple,token)

    # 测试be/fe状态监控
    base_tuple_fe = srmsg(dict_srsit["ip"], dict_srsit["port"],
                          dict_srsit["username"], dict_srsit["passwd"],
                          dict_srsit["dbname"][0], dict_srsit["check_fe_status"])
    fewarn(dict_srsit["envname"],base_tuple_fe,token)

    base_tuple_be = srmsg(dict_srsit["ip"], dict_srsit["port"],
                          dict_srsit["username"], dict_srsit["passwd"],
                          dict_srsit["dbname"][0], dict_srsit["check_be_status"])
    bewarn(dict_srsit["envname"],base_tuple_be,token)

另外,钉钉的一些开发说明,请参阅钉钉官网

注册机器人链接:https://open.dingtalk.com/document/connector/alarm-subscription

相关推荐
程序员的世界你不懂28 分钟前
【Flask】测试平台开发,新增说明书编写和展示功能 第二十三篇
java·前端·数据库
自学也学好编程34 分钟前
【数据库】Redis详解:内存数据库与缓存之王
数据库·redis
鳄鱼杆1 小时前
服务器 | Docker应用开发与部署的实践以及阿里云镜像加速配置
服务器·阿里云·docker
JAVA不会写1 小时前
在Mybatis plus中如何使用自定义Sql
数据库·sql
IT 小阿姨(数据库)1 小时前
PgSQL监控死元组和自动清理状态的SQL语句执行报错ERROR: division by zero原因分析和解决方法
linux·运维·数据库·sql·postgresql·centos
ChinaRainbowSea2 小时前
7. LangChain4j + 记忆缓存详细说明
java·数据库·redis·后端·缓存·langchain·ai编程
小马学嵌入式~3 小时前
嵌入式 SQLite 数据库开发笔记
linux·c语言·数据库·笔记·sql·学习·sqlite
Java小白程序员3 小时前
MyBatis基础到高级实践:全方位指南(中)
数据库·mybatis
Monly213 小时前
人大金仓:merge sql error, dbType null, druid-1.2.20
数据库·sql
不宕机的小马达3 小时前
【Mysql|第一篇】Mysql的安装与卸载、Navicat工具的使用
数据库·mysql