网络安全-用脚本调用系统snmpwalk命令查询并邮件报警

SNMP.py

Linux系统,安装snmpwalk,python3

python 复制代码
#!/usr/bin/python3
#_*_ coding:utf8 _*_

import os,time,re,subprocess

import smtplib
from smtplib import SMTP
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header

from concurrent.futures import ThreadPoolExecutor
from queue import Queue

## snmp OID
# snmpwalk -v 2c -c snmp读密码 SNMP服务器IP .1.3.6.1.2.1.25.3.3.1.2              # CPU负载
# snmpwalk -v 2c -c snmp读密码 SNMP服务器IP HOST-RESOURCES-MIB::hrProcessorLoad  # CPU负载
# snmpwalk -v 2c -c snmp读密码 SNMP服务器IP 1.3.6.1.2.1.25.2                     # 内存、硬盘使用率
# snmpwalk -v 2c -c snmp读密码 SNMP服务器IP 1.3.6.1.2.1.25                       # 进程使用cpu信息、进程使用内存信息,及内存、硬盘使用率

## 发邮件通知,设置邮件内容

def 纯文本邮件内容对象(邮件主题, 邮件正文, 发件人, 收件人):
    #创建一个带附件的实例
    message = MIMEMultipart()
    message['From'] = Header(发件人, 'utf-8')
    message['To'] =  Header(收件人, 'utf-8')
    # 邮件主题
    subject = 邮件主题
    message['Subject'] = Header(subject, 'utf-8')
    
    # 邮件正文,纯文本
    TEXT = MIMEText(邮件正文, 'plain', 'utf-8')
    message.attach(TEXT)
    return(message)

def 批量发邮件(SMTP_SERVER, 发件人, 邮箱密码, L_邮件任务):
    try:
        client = smtplib.SMTP()
        #client = smtplib.SMTP_SSL()    # 需要使用SSL,可以这样创建client
        client.connect(SMTP_SERVER, 25)
        #client.set_debuglevel(1)       # 开启DEBUG模式
        client.login(发件人, 邮箱密码)
    #except smtplib.SMTPConnectError as e:
    #    print('邮件发送失败,连接失败:', e.smtp_code, e.smtp_error)
    #except smtplib.SMTPAuthenticationError as e:
    #    print('邮件发送失败,认证错误:', e.smtp_code, e.smtp_error)
    #except smtplib.SMTPSenderRefused as e:
    #    print('邮件发送失败,发件人被拒绝:', e.smtp_code, e.smtp_error)
    #except smtplib.SMTPRecipientsRefused as e:
    #    print('邮件发送失败,收件人被拒绝:', e.smtp_code, e.smtp_error)
    #except smtplib.SMTPDataError as e:
    #    print('邮件发送失败,数据接收拒绝:', e.smtp_code, e.smtp_error)
    #except smtplib.SMTPException as e:
    #    print('邮件发送失败, ', e.message)
    except Exception as e:
        print(f"邮件发送异常 {e}")
    else:
        for 收件人,邮件主题,邮件正文 in L_邮件任务:
            time.sleep(2)
            message = 纯文本邮件内容对象(邮件主题, 邮件正文, 发件人, 收件人)
            try:
                client.sendmail(发件人, 收件人, message.as_string())
            except Exception as e:
                print(e)
            else:
                print(f"{time.strftime('%Y%m%d')} 发送成功: {收件人:32s} {邮件主题}")
        client.quit()
        print("发送完成")



## 获取CPU占用最高的前N个进程信息
def SHOW_PID_CPU_TOP_N(N, D_PID_NAME, D_PID_CPU):
    R_L_PID_CPU_TOP_N = []
    L_PID_CPU = [D_PID_CPU[i] for i in D_PID_CPU]   # 从D_PID_CPU字典中提取CPU占用量值做成列表
    L_PID_CPU.sort(reverse=True)                    # 从大到小排序
    for i in L_PID_CPU[0:N]:                        # 取开头N个
        for PID,PerfCPU in D_PID_CPU.items():
            if i == PerfCPU:
                PID_NAME = D_PID_NAME[PID]
                R_TEXT =  str(PID_NAME) + ' ' + str(PerfCPU)
                R_L_PID_CPU_TOP_N.append(R_TEXT)
    return(R_L_PID_CPU_TOP_N)


## 获取内存占用最高的前N个进程信息
def SHOW_PID_MEM_TOP_N(N, D_PID_NAME, D_PID_MEM):
    R_L_PID_MEM_TOP_N = []
    L_PID_MEM = [D_PID_MEM[i] for i in D_PID_MEM]
    L_PID_MEM.sort(reverse=True)                    # 从大大小排序
    for i in L_PID_MEM[0:N]:                        # 取开头N个
        for PID,PerfMem in D_PID_MEM.items():
            if i == PerfMem:
                PID_NAME = D_PID_NAME[PID]
                R_TEXT =  str(PID_NAME) + ' ' + str(PerfMem) + ' KBytes'
                R_L_PID_MEM_TOP_N.append(R_TEXT)
    return(R_L_PID_MEM_TOP_N)


## 处理SNMP查询结果,查CPU及内存,生成字典,方便使用
'''
# 进程名
HOST-RESOURCES-MIB::hrSWRunName.1 = STRING: "System Idle Process"
HOST-RESOURCES-MIB::hrSWRunName.4 = STRING: "System"
# 进程占用CPU
HOST-RESOURCES-MIB::hrSWRunPerfCPU.1 = INTEGER: 88064
HOST-RESOURCES-MIB::hrSWRunPerfCPU.4 = INTEGER: 725
# 进程使用内存
HOST-RESOURCES-MIB::hrSWRunPerfMem.1 = INTEGER: 4 KBytes
HOST-RESOURCES-MIB::hrSWRunPerfMem.4 = INTEGER: 140 KBytes
'''
def PID_NAME_CPU_MEM(output):
    D_PID_NAME = {}     # Key:'进程号', Value:'进程名'
    D_PID_CPU = {}      # Key:'进程号', Value:进程占用CPU时间(数值)
    D_PID_MEM = {}      # Key:'进程号', Value:进程占用内存数值(KBytes)
    RE_PID = "(HOST-RESOURCES-MIB::hrSWRunName(.*))|(HOST-RESOURCES-MIB::hrSWRunPerfCPU(.*))|(HOST-RESOURCES-MIB::hrSWRunPerfMem(.*))"  # SNMP返回结果用的RE过滤规则
    R = re.finditer(RE_PID, output)
    L_PID_INFO = [i.group() for i in R]
    for i in L_PID_INFO:                                    # 'HOST-RESOURCES-MIB::hrSWRunName.1 = STRING: "System Idle Process"'
        K,V = i.split(' = ')                                # 分割为 'HOST-RESOURCES-MIB::hrSWRunName.1' 和 'STRING: "System Idle Process"'
        NAME,INDEX = K.split('.')                           # 分割为 'HOST-RESOURCES-MIB::hrSWRunName' 和 '1'
        if NAME == 'HOST-RESOURCES-MIB::hrSWRunName':
            VV = V.split(': ')[-1]                          # 从 STRING: "System Idle Process" 中提取 "System Idle Process"
            D_PID_NAME[INDEX] = VV                          # {'1':"System Idle Process"}
        elif NAME == 'HOST-RESOURCES-MIB::hrSWRunPerfCPU':
            VV = int(V.split(': ')[-1])
            D_PID_CPU[INDEX] = VV                           # {'1': 88064}
        elif NAME == 'HOST-RESOURCES-MIB::hrSWRunPerfMem':
            VV = V.split(': ')[-1]
            PID_MEM = int(VV.split(' ')[0])
            D_PID_MEM[INDEX] = PID_MEM                      # {'1':4}
    return(D_PID_NAME, D_PID_CPU, D_PID_MEM)


def 单位转换(INT, UNIT):
    if UNIT == 'Bytes':
        X = round(INT/1024/1024/1024)
        return(f"{X}GB")
    else:
        return(f"{INT}{UNIT}")

## 执行SNMP命令获取CPU、内存、硬盘分区使用率信息
## 返回 (StatusCode, SYS_INFO, D_SYS_INFO)
## StatusCode 含义: 0 成功,1 查CPU失败,2 查MEM/DISK失败,3 查CPU/MEM/DISK都失败
## SYS_INFO (文本)字典类型存储CPU/MEM/DISK的使用率百分比
def SNMP_CPU_MEM_DISK_USED(SYSTEM, SNMP_HOST, SNMP_PWD):
    D_SYS_INFO = {'CPU':-1, 'MEM':-1, 'SWAP':-1, 'DISK':{}}
    SYS_INFO = ''       # 记录查询分析结果
    StatusCode = 0      # 函数执行结果状态码,SNMP命令执行情况:0 正常,1 获取CPU使用率的SNMP命令失败,2 获取CPU/内存/硬盘信息的SNMP命令失败,3 两个全部失败
    
    ## 获取CPU使用率的SNMP命令
    CMD = 'snmpwalk -v 2c -c ' + SNMP_PWD + ' ' + SNMP_HOST + ' HOST-RESOURCES-MIB::hrProcessorLoad'
    (CPU_status,CPU_output) = subprocess.getstatusoutput(CMD)     # 执行命令
    #print CPU_status
    #print CPU_output
    
    ## 获取内存、硬盘使用率的SNMP命令
    CMD = 'snmpwalk -v 2c -c ' + SNMP_PWD + ' ' + SNMP_HOST + ' 1.3.6.1.2.1.25.2'
    (MEM_DISK_status,ALL_output) = subprocess.getstatusoutput(CMD)     # 执行命令
    #print(CMD)
    #print MEM_DISK_status
    #print ALL_output
    
    ## 两次查询都成功再继续分析
    if CPU_status == 0 and MEM_DISK_status == 0:
        ## CPU负载
        L_CPU = []                                      # 存放每个核心的负载值
        L = CPU_output.split('\n')
        for i in L:
            X = i.split('=')[-1]
            Y = X.split(':')[-1]
            Z = int(Y)
            L_CPU.append(Z)
        if L_CPU != []:
            CPU_PCT = sum(L_CPU) / len(L_CPU)       # 计算CPU使用率
            D_SYS_INFO['CPU'] = CPU_PCT             # 记录CPU使用率
            SYS_INFO += f"CPU({len(L_CPU)}):{CPU_PCT:.1f}% | "  # 记录CPU(核心数):使用率%
        else:                                                   # SNMP信息中找不到CPU信息
            SYS_INFO += f"CPU({len(L_CPU)}):ERROR | "           # CPU使用率标记为错误
        
        ## 内存、磁盘分区使用率
        D_Storage = {}         # 字典,
        PHYSICAL_MEM_ID = None # 记录物理内存对应的ID号
        BUFFERS_MEM_ID = None  # 记录buff内存对应的ID号
        CACHED_MEM_ID = None   # 记录buff/cache内存对应的ID号
        #SHARED_MEM_ID = None   # 记录shared内存对应的ID号
        SWAP_SPACE_ID = None   # 记录SWAP内存对应的ID号
        L_DISK_ID = []         # 记录各分区的ID
        RE_hrStorage = '(HOST-RESOURCES-MIB::hrStorageType(.*))|(HOST-RESOURCES-MIB::hrStorageDescr(.*))|(HOST-RESOURCES-MIB::hrStorageSize(.*))|(HOST-RESOURCES-MIB::hrStorageUsed(.*))|(HOST-RESOURCES-MIB::hrStorageAllocationUnits(.*))'
        R = re.finditer(RE_hrStorage, ALL_output)                           # 从SNMP返回结果中过滤出需要的数据
        L_hrStorage_INFO = [i.group() for i in R]                           # 过滤出的结果做成列表
        #print("L_hrStorage_INFO", L_hrStorage_INFO)
        
        if SYSTEM == 'Linux':
            ## Linux 计算内存使用率
            for i in L_hrStorage_INFO:
                K,V = i.split('=')
                D_Storage[K] = V                                                # 过滤出的结果做成字典
                if V == ' STRING: Physical memory':
                    PHYSICAL_MEM_ID = K.split('.')[-1]                          # 物理内存对应的ID号
                elif V == ' STRING: Cached memory':
                    CACHED_MEM_ID = K.split('.')[-1]                            # buff/cache内存对应的ID号
                #elif V == ' STRING: Shared memory':
                #    SHARED_MEM_ID = K.split('.')[-1]                           # shared内存对应的ID号
                elif V == ' STRING: Memory buffers':
                    BUFFERS_MEM_ID = K.split('.')[-1]                           # buff内存对应的ID号
                elif V == ' STRING: Swap space':
                    SWAP_SPACE_ID = K.split('.')[-1]                            # SWAP内存对应的ID号
                elif V == ' OID: HOST-RESOURCES-TYPES::hrStorageFixedDisk':
                    L_DISK_ID.append(K.split('.')[-1])                          # 磁盘各分区对应的ID号
            #print("PHYSICAL_MEM_ID", PHYSICAL_MEM_ID)
            #print("BUFFERS_MEM_ID", BUFFERS_MEM_ID)
            #print("CACHED_MEM_ID", CACHED_MEM_ID)
            #print("SHARED_MEM_ID", SHARED_MEM_ID)
            #print("SWAP_SPACE_ID", SWAP_SPACE_ID)
            #print("D_Storage", D_Storage)
            #for KEY in D_Storage:
            #    print(f"|{KEY}|{D_Storage[KEY]}|")

            if PHYSICAL_MEM_ID != None and BUFFERS_MEM_ID != None and CACHED_MEM_ID != None and SWAP_SPACE_ID != None:
                PHYSICAL_MEM_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.' + PHYSICAL_MEM_ID]   # 内存总大小(簇数量)
                PHYSICAL_MEM_Size_int = int(PHYSICAL_MEM_Size_str.split(':')[-1])
                PHYSICAL_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + PHYSICAL_MEM_ID]   # 内存使用大小(簇数量)
                PHYSICAL_MEM_Used_int = int(PHYSICAL_MEM_Used_str.split(':')[-1])
                BUFFERS_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + BUFFERS_MEM_ID]
                BUFFERS_MEM_Used_int = int(BUFFERS_MEM_Used_str.split(':')[-1])
                CACHED_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + CACHED_MEM_ID]
                CACHED_MEM_Used_int = int(CACHED_MEM_Used_str.split(':')[-1])
                #SHARED_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + SHARED_MEM_ID]
                #SHARED_MEM_Used_int = int(SHARED_MEM_Used_str.split(':')[-1])
                SWAP_SPACE_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.' + SWAP_SPACE_ID]   # SWAP总大小(簇数量)
                SWAP_SPACE_Size_int = int(SWAP_SPACE_Size_str.split(':')[-1])
                SWAP_SPACE_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.' + SWAP_SPACE_ID]   # SWAP总大小(簇数量)
                SWAP_SPACE_Used_int = int(SWAP_SPACE_Used_str.split(':')[-1])
                #print("PHYSICAL_MEM_Size_int", PHYSICAL_MEM_Size_int)
                #print("PHYSICAL_MEM_Used_int", PHYSICAL_MEM_Used_int)
                #print("BUFFERS_MEM_Used_int", BUFFERS_MEM_Used_int)
                #print("CACHED_MEM_Used_int", CACHED_MEM_Used_int)
                #print("SHARED_MEM_Used_int", SHARED_MEM_Used_int)
                #print("SWAP_SPACE_Size_int", SWAP_SPACE_Size_int)
                #print("SWAP_SPACE_Used_int", SWAP_SPACE_Used_int)
                
                ## 计算物理内存大小
                MEM_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+PHYSICAL_MEM_ID]   # 分区簇大小
                #print("MEM_Unit_str", MEM_Unit_str)
                Unit_INT, Unit_STR = MEM_Unit_str.split(':')[-1].split()
                MEM_Unit = (int(Unit_INT), Unit_STR)                                               # (簇大小, 单位)
                #print(f"MEM_Unit={MEM_Unit}")
                PHYSICAL_MEM_GB = 单位转换(PHYSICAL_MEM_Size_int*MEM_Unit[0], MEM_Unit[1])
                ## 计算交换分区大小
                SWAP_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+SWAP_SPACE_ID]   # 分区簇大小
                #print("SWAP_Unit_str", SWAP_Unit_str)
                Unit_INT, Unit_STR = SWAP_Unit_str.split(':')[-1].split()
                SWAP_Unit = (int(Unit_INT), Unit_STR)                                               # (簇大小, 单位)
                #print(f"SWAP_Unit={SWAP_Unit}")
                SWAP_GB = 单位转换(PHYSICAL_MEM_Size_int*SWAP_Unit[0], SWAP_Unit[1])
                
                ## Linux 计算物理内存使用率
                ## 【free命令 total】      = hrStorageSize.1(Physical memory 物理内存总容量)
                ## 【free命令 free 】      = hrStorageSize.1(Physical memory 物理内存总容量) - hrStorageUsed.1(Physical memory 物理内存已经使用容量)
                ## 【free命令 used 】      = total - free - buff/cache
                ## 【free命令 buff/cache】 = hrStorageUsed.6(Memory buffers) + hrStorageUsed.7(Cached memory)
                ## 内存使用量 = hrStorageSize.1 - (hrStorageSize.1 - hrStorageUsed.1)- (hrStorageUsed.6 + hrStorageUsed.7)
                ## 内存使用量(公式简化) = hrStorageUsed.1 - hrStorageUsed.6 - hrStorageUsed.7
                ## 内存使用率 = 内存使用量 / 内存总容量
                ## 内存使用率 = (hrStorageUsed.1 - hrStorageUsed.6 - hrStorageUsed.7) / hrStorageSize.1
                MEM_PCT = ((PHYSICAL_MEM_Used_int - BUFFERS_MEM_Used_int - CACHED_MEM_Used_int) * 100) / PHYSICAL_MEM_Size_int  # 计算内存使用率(百分比)
                SYS_INFO += f"MEM({PHYSICAL_MEM_GB}):{MEM_PCT:.1f}% | "     # 记录MEM(总大小(GB)):使用率%
                D_SYS_INFO['MEM'] = MEM_PCT         # 记录内存使用率
                
                ## Linux 计算SWAP使用率
                SWAP_PCT = (SWAP_SPACE_Used_int * 100) / SWAP_SPACE_Size_int
                #print("SWAP_PCT", SWAP_PCT)
                SYS_INFO += f"SWAP({SWAP_GB}):{SWAP_PCT:.1f}% | "     # 记录SWAP_PCT(总大小(GB)):使用率%
                D_SYS_INFO['SWAP'] = SWAP_PCT       # 记录SWAP使用率
            else:                              # SNMP信息中获取内存或交换分区失败
                SYS_INFO += "MEM(ERROR)  "      # 内存查询失败
                SYS_INFO += "SWAP(ERROR)  "     # SWAP查询失败
        elif SYSTEM == 'Windows':
            ## Windows 计算内存使用率
            MEM_ID = None       # 记录物理内存对应的ID号
            V_MEM_ID = None     # 记录虚拟内存对应的ID号
            for i in L_hrStorage_INFO:
                K,V = i.split('=')
                D_Storage[K] = V
                if V == ' OID: HOST-RESOURCES-TYPES::hrStorageRam':
                    MEM_ID = K.split('.')[-1]                                   # 物理内存对应的ID号
                elif V == ' OID: HOST-RESOURCES-TYPES::hrStorageVirtualMemory':
                    V_MEM_ID = K.split('.')[-1]                                 # 虚拟内存对应的ID号
                elif V == ' OID: HOST-RESOURCES-TYPES::hrStorageFixedDisk':
                    L_DISK_ID.append(K.split('.')[-1])                          # 磁盘各分区对应的ID号

            if MEM_ID != None:
                ## 计算使用率
                MEM_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.'+MEM_ID]   # 内存总大小(簇数量)
                MEM_Size_int = int(MEM_Size_str.split(':')[-1])
                MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.'+MEM_ID]   # 内存使用大小(簇数量)
                MEM_Used_int = int(MEM_Used_str.split(':')[-1])
                #print("MEM_Size_str", MEM_Size_str)
                #print("MEM_Size_int", MEM_Size_int)
                #print("MEM_Used_str", MEM_Used_str)
                #print("MEM_Used_int", MEM_Used_int)
                MEM_PCT = (MEM_Used_int * 100) / MEM_Size_int                           # 计算百分比
                #print("MEM_PCT", MEM_PCT)
                ## 计算大小
                MEM_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+MEM_ID]   # 分区簇大小
                #print("MEM_Unit_str", MEM_Unit_str)
                Unit_INT, Unit_STR = MEM_Unit_str.split(':')[-1].split()
                MEM_Unit = (int(Unit_INT), Unit_STR)                                               # (簇大小, 单位)
                #print(f"MEM_Unit={MEM_Unit}")
                ## 记录
                SYS_INFO += f"MEM({单位转换(MEM_Size_int*MEM_Unit[0], MEM_Unit[1])}):{MEM_PCT:.1f}% | "     # 记录MEM(总大小(GB)):使用率%
                D_SYS_INFO['MEM'] = MEM_PCT    # 记录内存使用率
            else:
                SYS_INFO += "MEM(ERROR) | "      # 内存查询失败
                
            if V_MEM_ID != None:
                ## 计算使用率
                V_MEM_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.'+V_MEM_ID]   # 内存总大小(簇数量)
                V_MEM_Size_int = int(V_MEM_Size_str.split(':')[-1])
                V_MEM_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.'+V_MEM_ID]   # 内存使用大小(簇数量)
                V_MEM_Used_int = int(V_MEM_Used_str.split(':')[-1])
                V_MEM_PCT = (V_MEM_Used_int * 100) / V_MEM_Size_int                         # 计算百分比
                ## 计算大小
                V_MEM_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+V_MEM_ID]   # 分区簇大小
                #print("V_MEM_Unit_str", V_MEM_Unit_str)
                Unit_INT, Unit_STR = V_MEM_Unit_str.split(':')[-1].split()
                V_MEM_Unit = (int(Unit_INT), Unit_STR)                                                 # (簇大小, 单位)
                #print(f"V_MEM_Unit={V_MEM_Unit}")
                ## 记录
                SYS_INFO += f"V_MEM({单位转换(V_MEM_Size_int*V_MEM_Unit[0], V_MEM_Unit[1])}):{V_MEM_PCT:.1f}% | "     # 记录V_MEM(总大小(GB)):使用率%
                D_SYS_INFO['SWAP'] = V_MEM_PCT         # 记录SWAP使用率
            else:
                SYS_INFO += "V_MEM(ERROR) | "     # SWAP查询失败
        else:
            ## 非 Linux 非 Windows 系统
            SYS_INFO += 'NOT Linux/Windows'

        ## 计算磁盘分区使用率
        if L_DISK_ID != []:
            L_DISK_PATH_INFO = []
            for i in L_DISK_ID:
                DISK_Descr = D_Storage['HOST-RESOURCES-MIB::hrStorageDescr.'+i].split('STRING: ')[-1]   # 分区盘符等信息
                if SYSTEM == 'Windows':
                    DISK_Descr = DISK_Descr[0:2]                                                        # Windows系统简化分区盘符描述,只保留 'C:' 'D:' 形式
                DISK_Unit_str = D_Storage['HOST-RESOURCES-MIB::hrStorageAllocationUnits.'+i]            # 分区簇大小
                #print("DISK_Unit_str", DISK_Unit_str)
                Unit_INT, Unit_STR = DISK_Unit_str.split(':')[-1].split()
                DISK_Unit = (int(Unit_INT), Unit_STR)                                                   # (簇大小, 单位)
                #print(f"DISK_Unit={DISK_Unit}")
                DISK_Size_str = D_Storage['HOST-RESOURCES-MIB::hrStorageSize.'+i]                       # 分区总大小(簇数量)
                DISK_Size_int = int(DISK_Size_str.split(':')[-1])
                DISK_Used_str = D_Storage['HOST-RESOURCES-MIB::hrStorageUsed.'+i]                       # 分区已用大小(簇数量)
                DISK_Used_int = int(DISK_Used_str.split(':')[-1])
                DISK_PCT = (DISK_Used_int * 100) / DISK_Size_int                                        # 计算当前分区使用率
                D_SYS_INFO['DISK'][DISK_Descr] = (round(DISK_PCT, 1), DISK_Size_int*DISK_Unit[0], DISK_Unit[1]) # 记录当前分区(使用率,总大小,单位)
                L_DISK_PATH_INFO.append(f"{DISK_Descr}({单位转换(DISK_Size_int*DISK_Unit[0], DISK_Unit[1])}):{DISK_PCT:.1f}%")
            SYS_INFO += 'DISK_PATH: '
            SYS_INFO += ', '.join(L_DISK_PATH_INFO)
        else:                                                       ## SNMP信息中获取磁盘分区信息失败
            SYS_INFO += f"DISK_PATH: ERROR"
    else:
        ## 记录查询失败的情况,0 成功,1 查CPU失败,2 查MEM/DISK失败,3 查CPU/MEM/DISK都失败
        if CPU_status != 0:
            StatusCode += 1
            SYS_INFO += '【CPU 查询失败】'
        if MEM_DISK_status != 0:
            StatusCode += 2
            SYS_INFO += '【MEM/DISK 查询失败】'
    
    return(StatusCode, SYS_INFO, D_SYS_INFO)

def 普通_执行(TASK):
    SYSTEM, SNMP_PARAM, ALARM_PARAM, SAVE_FILE_PATH = TASK
    SNMP_IP, SNMP_PORT, SNMP_PWD = SNMP_PARAM
    CPU_ALARM, MEM_ALARM, SWAP_ALARM, DISK_ALARM, CPU_TOP_N, MEM_TOP_N = ALARM_PARAM
    
    L_ALARM = []        # 超阈值项目列表
    L_ERROR = []        # 错误信息列表
    L_CPU_TOP = []      # 当CPU超过阈值时记录CPU使用率最高的N个进程信息
    L_MEM_TOP = []      # 当MEM超过阈值时记录MEM使用率最高的N个进程信息
    SNMP_HOST = SNMP_IP + ':' + str(SNMP_PORT)
    StatusCode, SYS_INFO, D_SYS_INFO = SNMP_CPU_MEM_DISK_USED(SYSTEM, SNMP_HOST, SNMP_PWD)

    if StatusCode == 0:   # SNMP 执行成功
        ## 检查 CPU/MEM 查询结果
        ## 当 CPU 或 MEM 任意一个超过阈值时再次查SNMP记录进程信息
        if D_SYS_INFO['CPU'] > CPU_ALARM or D_SYS_INFO['MEM'] > MEM_ALARM:
            CMD = 'snmpwalk -v 2c -c ' + SNMP_PWD + ' ' + SNMP_HOST + ' 1.3.6.1.2.1.25' ## 获取CPU、内存、硬盘、进程信息的SNMP命令
            #print("再次执行SNMP命令查询进程信息")
            (status,output) = subprocess.getstatusoutput(CMD)     # 执行命令
            if status == 0:
                D_PID_NAME, D_PID_CPU, D_PID_MEM = PID_NAME_CPU_MEM(output)
                ## CPU 超过阈值,记录CPU占用最高的前N个进程信息
                if D_SYS_INFO['CPU'] > CPU_ALARM:
                    L_ALARM.append('CPU>'+str(CPU_ALARM))
                    L_CPU_TOP = SHOW_PID_CPU_TOP_N(CPU_TOP_N, D_PID_NAME, D_PID_CPU)
                ## MEM 超过阈值,记录MEM占用最高的前N个进程信息
                if D_SYS_INFO['MEM'] > MEM_ALARM:
                    L_ALARM.append('MEM>'+str(MEM_ALARM))
                    L_MEM_TOP = SHOW_PID_MEM_TOP_N(MEM_TOP_N, D_PID_NAME, D_PID_MEM)
            else:
                L_ERROR.append('SNMP 查 CPU/MEM Uesd Top 失败')
        
        ## 检查 SWAP 查询结果
        SWAP_PCT = D_SYS_INFO['SWAP']
        if SWAP_PCT == 'ERROR':
            L_ERROR.append('查SWAP失败')
        else:
            if SWAP_PCT > SWAP_ALARM:   # SWAP 使用率超过阈值
                L_ALARM.append('SWAP>'+str(SWAP_ALARM))
        
        ## 分析磁盘分区使用率
        if D_SYS_INFO['DISK'] == {}:
            L_ERROR.append('查DISK失败')
        else:
            for K in D_SYS_INFO['DISK']:
                if D_SYS_INFO['DISK'][K][0] > DISK_ALARM:  # 磁盘分区使用率超过阈值
                    L_ALARM.append('DISK PATH '+K+'>'+str(DISK_ALARM))
    elif StatusCode == 1:
        L_ERROR.append('查CPU失败')
    elif StatusCode == 2:
        L_ERROR.append('查MEM/DISK失败')
    elif StatusCode == 3:
        L_ERROR.append('查CPU/MEM/DISK都失败')
    
    ## SNMP查询结果
    # D_SYS_INFO = {'CPU':'', 'MEM':'', 'SWAP':'', 'DISK':{}}
    if SAVE_FILE_PATH == '':
        LOG_FILE = SYSTEM+'_'+SNMP_IP + '_' + SNMP_PORT + '.log'
    else:
        LOG_FILE = SAVE_FILE_PATH
    TIME = time.strftime('%Y-%m-%d %H:%M:%S')
    HOST = f"{SNMP_IP}:{SNMP_PORT}"
    LOG_TEXT = f"{HOST:21s} {TIME} | {SYS_INFO}"
    
    ALARM_TEXT = ''     # 告警信息,SNMP执行出错,指标超过阈值
    if L_ERROR != []:
        ALARM_TEXT += 'SNMP ERROR:\n'
        for i in L_ERROR:
            ALARM_TEXT += '  {0}\n'.format(i)
    if L_ALARM != []:
        ALARM_TEXT += 'SYS ALARM:\n'
        for i in L_ALARM:
            ALARM_TEXT += '  {0}\n'.format(i)
    if L_CPU_TOP != []:
        ALARM_TEXT += ' CPU_TOP_{0}\n'.format(CPU_TOP_N)
        for i in L_CPU_TOP:
            ALARM_TEXT += '  {0}\n'.format(i)
    if L_MEM_TOP != []:
        ALARM_TEXT += ' MEM_TOP_{0}\n'.format(MEM_TOP_N)
        for i in L_MEM_TOP:
            ALARM_TEXT += '  {0}\n'.format(i)
    
    if ALARM_TEXT != '':
        ALARM_TEXT = HOST + ' ' + ALARM_TEXT
    
    return((LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH))

def 显示任务进度(q, 计时开始时间, 显示时间间隔, 任务数量):
    while 1:
        time.sleep(显示时间间隔)
        if q.empty():
            continue
        else:
            LEN_NOW = q.qsize()
            print(f"进度 {LEN_NOW/任务数量*100:.0f}% ({LEN_NOW}/{任务数量}) 已用时 {int(time.time()-计时开始时间)} 秒")
            if q.qsize() == 任务数量:
                print("完成")
                break   # 终止线程

## 记录并分析SNMP返回结果
def 线程_执行(q, TASK):
    SYSTEM, SNMP_PARAM, ALARM_PARAM, SAVE_FILE_PATH = TASK
    SNMP_IP, SNMP_PORT, SNMP_PWD = SNMP_PARAM
    CPU_ALARM, MEM_ALARM, SWAP_ALARM, DISK_ALARM, CPU_TOP_N, MEM_TOP_N = ALARM_PARAM
    
    L_ALARM = []        # 超阈值项目列表
    L_ERROR = []        # 错误信息列表
    L_CPU_TOP = []      # 当CPU超过阈值时记录CPU使用率最高的N个进程信息
    L_MEM_TOP = []      # 当MEM超过阈值时记录MEM使用率最高的N个进程信息
    SNMP_HOST = SNMP_IP + ':' + str(SNMP_PORT)
    StatusCode, D_SYS_INFO = SNMP_CPU_MEM_DISK_USED(SYSTEM, SNMP_HOST, SNMP_PWD)

    if StatusCode == 0:   # SNMP 执行成功
        ## 检查 CPU/MEM 查询结果
        ## 当 CPU 或 MEM 任意一个超过阈值时再次查SNMP记录进程信息
        if D_SYS_INFO['CPU'] > CPU_ALARM or D_SYS_INFO['MEM'] > MEM_ALARM:
            CMD = 'snmpwalk -v 2c -c ' + SNMP_PWD + ' ' + SNMP_HOST + ' 1.3.6.1.2.1.25' ## 获取CPU、内存、硬盘、进程信息的SNMP命令
            #print("再次执行SNMP命令查询进程信息")
            (status,output) = subprocess.getstatusoutput(CMD)     # 执行命令
            if status == 0:
                D_PID_NAME, D_PID_CPU, D_PID_MEM = PID_NAME_CPU_MEM(output)
                ## CPU 超过阈值,记录CPU占用最高的前N个进程信息
                if D_SYS_INFO['CPU'] > CPU_ALARM:
                    L_ALARM.append('CPU>'+str(CPU_ALARM))
                    L_CPU_TOP = SHOW_PID_CPU_TOP_N(CPU_TOP_N, D_PID_NAME, D_PID_CPU)
                ## MEM 超过阈值,记录MEM占用最高的前N个进程信息
                if D_SYS_INFO['MEM'] > MEM_ALARM:
                    L_ALARM.append('MEM>'+str(MEM_ALARM))
                    L_MEM_TOP = SHOW_PID_MEM_TOP_N(MEM_TOP_N, D_PID_NAME, D_PID_MEM)
            else:
                L_ERROR.append('SNMP 查 CPU/MEM Uesd Top 失败')
        
        ## 检查 SWAP 查询结果
        SWAP_PCT = D_SYS_INFO['SWAP']
        if SWAP_PCT == 'ERROR':
            L_ERROR.append('查SWAP失败')
        else:
            if SWAP_PCT > SWAP_ALARM:   # SWAP 使用率超过阈值
                L_ALARM.append('SWAP>'+str(SWAP_ALARM))
        
        ## 分析磁盘分区使用率
        if D_SYS_INFO['DISK'] == {}:
            L_ERROR.append('查DISK失败')
        else:
            for K in D_SYS_INFO['DISK']:
                if D_SYS_INFO['DISK'][K] > DISK_ALARM:  # 磁盘分区使用率超过阈值
                    L_ALARM.append('DISK PATH '+K+'>'+str(DISK_ALARM))
    elif StatusCode == 1:
        L_ERROR.append('查CPU失败')
    elif StatusCode == 2:
        L_ERROR.append('查MEM/DISK失败')
    elif StatusCode == 3:
        L_ERROR.append('查CPU/MEM/DISK都失败')
    
    ## SNMP查询结果
    # D_SYS_INFO = {'CPU':'', 'MEM':'', 'SWAP':'', 'DISK':{}}
    if SAVE_FILE_PATH == '':
        LOG_FILE = SYSTEM+'_'+SNMP_IP + '_' + SNMP_PORT + '.log'
    else:
        LOG_FILE = SAVE_FILE_PATH
    TIME = time.strftime('%Y-%m-%d %H:%M:%S')
    #LOG_TEXT = '{0}  CPU: {1}%  MEM: {2}%  SWAP: {3}%  DISK:{4}\n'.format(TIME, D_SYS_INFO['CPU'], D_SYS_INFO['MEM'], D_SYS_INFO['SWAP'], D_SYS_INFO['DISK'])
    HOST = f"{SNMP_IP}:{SNMP_PORT}"
    LOG_TEXT = f"{HOST:21s} {TIME}  CPU:{D_SYS_INFO['CPU']:5.1f}%  MEM:{D_SYS_INFO['MEM']:5.1f}%  SWAP:{D_SYS_INFO['SWAP']:5.1f}%  DISK:{D_SYS_INFO['DISK']}"
    
    ALARM_TEXT = ''     # 告警信息,SNMP执行出错,指标超过阈值
    if L_ERROR != []:
        ALARM_TEXT += 'SNMP ERROR:\n'
        for i in L_ERROR:
            ALARM_TEXT += '  {0}\n'.format(i)
    if L_ALARM != []:
        ALARM_TEXT += 'SYS ALARM:\n'
        for i in L_ALARM:
            ALARM_TEXT += '  {0}\n'.format(i)
    if L_CPU_TOP != []:
        ALARM_TEXT += ' CPU_TOP_{0}\n'.format(CPU_TOP_N)
        for i in L_CPU_TOP:
            ALARM_TEXT += '  {0}\n'.format(i)
    if L_MEM_TOP != []:
        ALARM_TEXT += ' MEM_TOP_{0}\n'.format(MEM_TOP_N)
        for i in L_MEM_TOP:
            ALARM_TEXT += '  {0}\n'.format(i)
    
    if ALARM_TEXT != '':
        ALARM_TEXT = HOST + ' ' + ALARM_TEXT
    
    q.put((LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH))

def 多线程_执行(L_TASK, 并发数量):
    
    q = Queue()                         # 创建一个队列
    t1 = ThreadPoolExecutor(并发数量)   # 同时运行数量,不填则默认为cpu的个数*5
    for TASK in L_TASK:
        try:
            obj=t1.submit(线程_执行, q, TASK)
        except Exception as e:
            print(f"线程执行失败 {e}")
    
    t2 = ThreadPoolExecutor(1)      # 新开一个线程,用于显示进度
    计时开始时间 = time.time()
    显示时间间隔 = 2                # 秒
    任务数量 = len(L_TASK)          # 任务数量
    obj=t2.submit(显示任务进度, q, 计时开始时间, 显示时间间隔, 任务数量)
    t1.shutdown()
    t2.shutdown()
    
    L_R = []
    while 1:
        if q.empty():
            break
        L_R.append(q.get())
    return(L_R)


def 顺序执行(L_TASK, ALARM_SAVE_DIR, SHOW_or_SAVE, MAIL, MAIL_PARAM):
    if SHOW_or_SAVE == 'SHOW':
        for TASK in L_TASK:
            LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH = 普通_执行(TASK)
            print(LOG_TEXT)
            if ALARM_TEXT != '':
                print(ALARM_TEXT)
    else:
        ALARM_TEXT_ALL = ''
        for TASK in L_TASK:
            LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH = 普通_执行(TASK)
            with open(SAVE_FILE_PATH, 'a') as f:
                f.write(LOG_TEXT+'\n')
            if ALARM_TEXT != '':
                ALARM_TEXT_ALL += ALARM_TEXT + '\n'
    
        if ALARM_TEXT_ALL != '':
            if os.path.isdir(ALARM_SAVE_DIR):
                pass
            else:
                os.makedirs(ALARM_SAVE_DIR)
            ALARM_TIME = time.strftime('%Y%m%d_%H%M%S')
            ALARM_SAVE_PATH = os.path.join(ALARM_SAVE_DIR, f"{ALARM_TIME}.ALARM")
            with open(ALARM_SAVE_PATH, 'w') as f:
                f.write(ALARM_TEXT_ALL)
            
            if MAIL == 1:
                邮件服务器,发件人邮箱账号,发件人邮箱密码,收件人邮箱地址 = MAIL_PARAM
                L_邮件任务 = []
                主题 = f'服务器SNMP告警'
                邮件正文 = ALARM_TEXT_ALL
                邮件任务 = (收件人邮箱地址, 主题, 邮件正文)
                L_邮件任务.append(邮件任务)
                批量发邮件(邮件服务器, 发件人邮箱账号, 发件人邮箱密码, L_邮件任务)

def 并发执行(L_TASK, ALARM_SAVE_DIR, SHOW_or_SAVE, MAIL):
    并发数量 = len(L_TASK)
    R = 多线程_执行(L_TASK, 并发数量)
    if SHOW_or_SAVE == 'SHOW':
        for LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH in R:
            print(LOG_TEXT)
            if ALARM_TEXT != '':
                print(ALARM_TEXT)
    else:
        ALARM_TEXT_ALL = ''
        for LOG_TEXT, ALARM_TEXT, SAVE_FILE_PATH in R:
            with open(SAVE_FILE_PATH, 'a') as f:
                f.write(LOG_TEXT+'\n')
            if ALARM_TEXT != '':
                ALARM_TEXT_ALL += ALARM_TEXT + '\n'
        
        if ALARM_TEXT_ALL != '':
            if os.path.isdir(ALARM_SAVE_DIR):
                pass
            else:
                os.makedirs(ALARM_SAVE_DIR)
            ALARM_TIME = time.strftime('%Y%m%d_%H%M%S')
            ALARM_SAVE_PATH = os.path.join(ALARM_SAVE_DIR, f"{ALARM_TIME}.ALARM")
            with open(ALARM_SAVE_PATH, 'w') as f:
                f.write(ALARM_TEXT_ALL)
            
            if MAIL == 1:
                SMTP_SERVER = "smtp.邮件服务器.com"
                发件人 = "邮箱账号"
                邮箱密码 = "邮箱密码"
                
                L_邮件任务 = []
                收件人 = "邮箱账号"
                主题 = f'服务器SNMP告警'
                邮件正文 = ALARM_TEXT_ALL
                邮件任务 = (收件人, 主题, 邮件正文)
                L_邮件任务.append(邮件任务)
                
                批量发邮件(SMTP_SERVER, 发件人, 邮箱密码, L_邮件任务)


if __name__ == '__main__':
    ## 运行前设置参数
    
    ## SNMP参数设置
    #SNMP_IP = '192.168.250.22'  # SNMP被监控机的SNMP地址
    #SNMP_PORT = '161'           # SNMP被监控机的SNMP端口
    #SNMP_PWD = 'public'         # SNMP被监控机的SNMP密码
    
    ## 报警参数设置
    CPU_ALARM = 80      # CPU告警阈值(0-100)推荐 80
    MEM_ALARM = 80      # 内存告警阈值(0-100)推荐 80
    SWAP_ALARM = 80     # SWAP告警阈值(0-100)推荐 80
    DISK_ALARM = 80     # 分区使用率告警阈值(0-100)推荐 80
    CPU_TOP_N = 5       # 报警时显示N个CPU使用最高的进程
    MEM_TOP_N = 5       # 报警时显示N个MEM使用最高的进程
    ALARM_PARAM = [CPU_ALARM, MEM_ALARM, SWAP_ALARM, DISK_ALARM, CPU_TOP_N, MEM_TOP_N]
    
    ## 发邮件参数设置
    SMTP_SERVER = 'smtp.XXX.com'    # 发送者邮箱的SMTP服务器地址
    SENDER_USER = 'ABCg@XXX.com'    # 发送者用户名
    SENDER_PASS = 'pwd@000'         # 发送者密码
    RECEIVER = 'A@X.com'            # 接收者的邮箱 receiver
    MAIL_PARAM  = [SMTP_SERVER, SENDER_USER, SENDER_PASS, RECEIVER]
    
    ## 其他参数设置
    ALARM_SAVE_DIR = '/home/it/ALARM/'  # 告警信息本地存储目录
    SHOW_or_SAVE = 'SAVE'               # 'SHOW' 显示SNMP查询结果,'SAVE'保存SNMP结果
    MAIL = 0                            # 是否发邮件标志,0不发,1发送(当执行SNMP出错或者指标超过阈值时)
    
    
    L_TASK = []
    # 填 Linux 或 Windows 根据系统不同采用不同的CPU、内存计算方式
    L_TASK.append(('Linux',   ['192.168.100.2', '161', 'snmp密码'], ALARM_PARAM, '/home/it/SNMP/192.168.100.2.log'))
    L_TASK.append(('Windows', ['192.168.200.2', '161', 'snmp密码'], ALARM_PARAM, '/home/it/SNMP/192.168.200.2.log'))
    
    ## 运行
    顺序执行(L_TASK, ALARM_SAVE_DIR, SHOW_or_SAVE, MAIL, MAIL_PARAM)
    #并发执行(L_TASK, ALARM_SAVE_DIR, SHOW_or_SAVE, MAIL) # 有问题未解决
    
    ## 加入系统计划任务
    ## /usr/bin/python3 /home/it/PY/SNMP.py >> /home/it/PY/py.log
相关推荐
云飞云共享云桌面16 分钟前
8位机械工程师如何共享一台图形工作站算力?
linux·服务器·网络
阡之尘埃1 小时前
Python数据分析案例61——信贷风控评分卡模型(A卡)(scorecardpy 全面解析)
人工智能·python·机器学习·数据分析·智能风控·信贷风控
音徽编程2 小时前
Rust异步运行时框架tokio保姆级教程
开发语言·网络·rust
幺零九零零4 小时前
【C++】socket套接字编程
linux·服务器·网络·c++
23zhgjx-NanKon4 小时前
华为eNSP:QinQ
网络·安全·华为
23zhgjx-NanKon4 小时前
华为eNSP:mux-vlan
网络·安全·华为
点点滴滴的记录4 小时前
RPC核心实现原理
网络·网络协议·rpc
丕羽4 小时前
【Pytorch】基本语法
人工智能·pytorch·python
bryant_meng5 小时前
【python】Distribution
开发语言·python·分布函数·常用分布
Lionhacker5 小时前
网络工程师这个行业可以一直干到退休吗?
网络·数据库·网络安全·黑客·黑客技术