【HBase】HBaseJMX 接口监控信息实现钉钉告警

目录

[一、JMX 简介](#一、JMX 简介)

二、JMX监控信息钉钉告警实现


一、JMX 简介


官网:Apache HBase ™ Reference Guide

JMX (Java管理扩展)提供了内置的工具,使您能够监视和管理Java VM。要启用远程系统的监视和管理,需要在启动Java VM时设置系统属性com.sun.management.jmxremote.port(希望通过该端口号启用JMX RMI连接)。

访问:

curl http://hdp-node2:16030/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server

输出的 指标如下:

cs 复制代码
{
  "beans" : [ {
    "name" : "Hadoop:service=HBase,name=RegionServer,sub=Server",
    "modelerType" : "RegionServer,sub=Server",
    "tag.zookeeperQuorum" : "hdp-node2:2181,hdp-node3:2181,hdp-node1:2181",
    "tag.serverName" : "hdp-node2,16020,1738720067137",
    "tag.clusterId" : "85aa06c7-b28c-41fd-aa17-a49376641751",
    "tag.Context" : "regionserver",
    "tag.Hostname" : "hdp-node2",
    "regionCount" : 34,
    "storeCount" : 51,
    "hlogFileCount" : 1,
    "hlogFileSize" : 0,
    "storeFileCount" : 31,
    "memStoreSize" : 0,
    "storeFileSize" : 212110208,
    "maxStoreFileAge" : 23910274739,
    "minStoreFileAge" : 595075791,
    "avgStoreFileAge" : 12083745007,
    "numReferenceFiles" : 0,
    "regionServerStartTime" : 1738720067137,
    "averageRegionSize" : 6238535,
    "storeFileIndexSize" : 529736,
    "staticIndexSize" : 1348988,
    "staticBloomSize" : 2438222,
    "mutationsWithoutWALCount" : 0,
    "mutationsWithoutWALSize" : 0,
    "percentFilesLocal" : 100.0,
    "percentFilesLocalSecondaryRegions" : 0.0,
    "splitQueueLength" : 0,
    "compactionQueueLength" : 0,
    "smallCompactionQueueLength" : 0,
    "largeCompactionQueueLength" : 0,
    "flushQueueLength" : 0,
    "blockCacheFreeSize" : 1716727624,
    "blockCacheCount" : 0,
    "blockCacheSize" : 1259320,
    "blockCacheCountHitPercent" : 0.0,
    "blockCacheExpressHitPercent" : 0.0,
    "l1CacheHitCount" : 0,
    "l1CacheMissCount" : 0,
    "l1CacheHitRatio" : 0.0,
    "l1CacheMissRatio" : 0.0,
    "l2CacheHitCount" : 0,
    "l2CacheMissCount" : 0,
    "l2CacheHitRatio" : 0.0,
    "l2CacheMissRatio" : 0.0,
    "mobFileCacheCount" : 0,
    "mobFileCacheHitPercent" : 0.0,
    "totalRequestCount" : 2,
    "totalRowActionRequestCount" : 0,
    "readRequestCount" : 0,
    "filteredReadRequestCount" : 0,
    "writeRequestCount" : 0,
    "rpcGetRequestCount" : 0,
    "rpcScanRequestCount" : 0,
    "rpcMultiRequestCount" : 0,
    "rpcMutateRequestCount" : 0,
    "checkMutateFailedCount" : 0,
    "checkMutatePassedCount" : 0,
    "blockCacheHitCount" : 0,
    "blockCacheHitCountPrimary" : 0,
    "blockCacheMissCount" : 0,
    "blockCacheMissCountPrimary" : 0,
    "blockCacheEvictionCount" : 0,
    "blockCacheEvictionCountPrimary" : 0,
    "blockCacheFailedInsertionCount" : 0,
    "blockCacheDataMissCount" : 0,
    "blockCacheLeafIndexMissCount" : 0,
    "blockCacheBloomChunkMissCount" : 0,
    "blockCacheMetaMissCount" : 0,
    "blockCacheRootIndexMissCount" : 0,
    "blockCacheIntermediateIndexMissCount" : 0,
    "blockCacheFileInfoMissCount" : 0,
    "blockCacheGeneralBloomMetaMissCount" : 0,
    "blockCacheDeleteFamilyBloomMissCount" : 0,
    "blockCacheTrailerMissCount" : 0,
    "blockCacheDataHitCount" : 0,
    "blockCacheLeafIndexHitCount" : 0,
    "blockCacheBloomChunkHitCount" : 0,
    "blockCacheMetaHitCount" : 0,
    "blockCacheRootIndexHitCount" : 0,
    "blockCacheIntermediateIndexHitCount" : 0,
    "blockCacheFileInfoHitCount" : 0,
    "blockCacheGeneralBloomMetaHitCount" : 0,
    "blockCacheDeleteFamilyBloomHitCount" : 0,
    "blockCacheTrailerHitCount" : 0,
    "updatesBlockedTime" : 0,
    "flushedCellsCount" : 0,
    "compactedCellsCount" : 0,
    "majorCompactedCellsCount" : 0,
    "flushedCellsSize" : 0,
    "compactedCellsSize" : 0,
    "majorCompactedCellsSize" : 0,
    "cellsCountCompactedFromMob" : 0,
    "cellsCountCompactedToMob" : 0,
    "cellsSizeCompactedFromMob" : 0,
    "cellsSizeCompactedToMob" : 0,
    "mobFlushCount" : 0,
    "mobFlushedCellsCount" : 0,
    "mobFlushedCellsSize" : 0,
    "mobScanCellsCount" : 0,
    "mobScanCellsSize" : 0,
    "mobFileCacheAccessCount" : 0,
    "mobFileCacheMissCount" : 0,
    "mobFileCacheEvictedCount" : 0,
    "hedgedReads" : 0,
    "hedgedReadWins" : 0,
    "blockedRequestCount" : 0,
    "MajorCompactionTime_num_ops" : 2,
    "MajorCompactionTime_min" : 0,
    "MajorCompactionTime_max" : 0,
    "MajorCompactionTime_mean" : 0,
    "MajorCompactionTime_25th_percentile" : 0,
    "MajorCompactionTime_median" : 0,
    "MajorCompactionTime_75th_percentile" : 0,
    "MajorCompactionTime_90th_percentile" : 0,
    "MajorCompactionTime_95th_percentile" : 0,
    "MajorCompactionTime_98th_percentile" : 0,
    "MajorCompactionTime_99th_percentile" : 0,
    "MajorCompactionTime_99.9th_percentile" : 0,
    "MajorCompactionTime_TimeRangeCount_600000-inf" : 2,
    "PauseTimeWithGc_num_ops" : 0,
    "PauseTimeWithGc_min" : 0,
    "PauseTimeWithGc_max" : 0,
    "PauseTimeWithGc_mean" : 0,
    "PauseTimeWithGc_25th_percentile" : 0,
    "PauseTimeWithGc_median" : 0,
    "PauseTimeWithGc_75th_percentile" : 0,
    "PauseTimeWithGc_90th_percentile" : 0,
    "PauseTimeWithGc_95th_percentile" : 0,
    "PauseTimeWithGc_98th_percentile" : 0,
    "PauseTimeWithGc_99th_percentile" : 0,
    "PauseTimeWithGc_99.9th_percentile" : 0,
    "compactedOutputBytes" : 8924,
    "pauseWarnThresholdExceeded" : 0,
    "ScanTime_num_ops" : 0,
    "ScanTime_min" : 0,
    "ScanTime_max" : 0,
    "ScanTime_mean" : 0,
    "ScanTime_25th_percentile" : 0,
    "ScanTime_median" : 0,
    "ScanTime_75th_percentile" : 0,
    "ScanTime_90th_percentile" : 0,
    "ScanTime_95th_percentile" : 0,
    "ScanTime_98th_percentile" : 0,
    "ScanTime_99th_percentile" : 0,
    "ScanTime_99.9th_percentile" : 0,
    "Increment_num_ops" : 0,
    "Increment_min" : 0,
    "Increment_max" : 0,
    "Increment_mean" : 0,
    "Increment_25th_percentile" : 0,
    "Increment_median" : 0,
    "Increment_75th_percentile" : 0,
    "Increment_90th_percentile" : 0,
    "Increment_95th_percentile" : 0,
    "Increment_98th_percentile" : 0,
    "Increment_99th_percentile" : 0,
    "Increment_99.9th_percentile" : 0,
    "Delete_num_ops" : 0,
    "Delete_min" : 0,
    "Delete_max" : 0,
    "Delete_mean" : 0,
    "Delete_25th_percentile" : 0,
    "Delete_median" : 0,
    "Delete_75th_percentile" : 0,
    "Delete_90th_percentile" : 0,
    "Delete_95th_percentile" : 0,
    "Delete_98th_percentile" : 0,
    "Delete_99th_percentile" : 0,
    "Delete_99.9th_percentile" : 0,
    "Put_num_ops" : 0,
    "Put_min" : 0,
    "Put_max" : 0,
    "Put_mean" : 0,
    "Put_25th_percentile" : 0,
    "Put_median" : 0,
    "Put_75th_percentile" : 0,
    "Put_90th_percentile" : 0,
    "Put_95th_percentile" : 0,
    "Put_98th_percentile" : 0,
    "Put_99th_percentile" : 0,
    "Put_99.9th_percentile" : 0,
    "DeleteBatch_num_ops" : 0,
    "DeleteBatch_min" : 0,
    "DeleteBatch_max" : 0,
    "DeleteBatch_mean" : 0,
    "DeleteBatch_25th_percentile" : 0,
    "DeleteBatch_median" : 0,
    "DeleteBatch_75th_percentile" : 0,
    "DeleteBatch_90th_percentile" : 0,
    "DeleteBatch_95th_percentile" : 0,
    "DeleteBatch_98th_percentile" : 0,
    "DeleteBatch_99th_percentile" : 0,
    "DeleteBatch_99.9th_percentile" : 0,
    "splitRequestCount" : 0,
    "FlushMemstoreSize_num_ops" : 0,
    "FlushMemstoreSize_min" : 0,
    "FlushMemstoreSize_max" : 0,
    "FlushMemstoreSize_mean" : 0,
    "FlushMemstoreSize_25th_percentile" : 0,
    "FlushMemstoreSize_median" : 0,
    "FlushMemstoreSize_75th_percentile" : 0,
    "FlushMemstoreSize_90th_percentile" : 0,
    "FlushMemstoreSize_95th_percentile" : 0,
    "FlushMemstoreSize_98th_percentile" : 0,
    "FlushMemstoreSize_99th_percentile" : 0,
    "FlushMemstoreSize_99.9th_percentile" : 0,
    "CompactionInputFileCount_num_ops" : 2,
    "CompactionInputFileCount_min" : 0,
    "CompactionInputFileCount_max" : 0,
    "CompactionInputFileCount_mean" : 0,
    "CompactionInputFileCount_25th_percentile" : 0,
    "CompactionInputFileCount_median" : 0,
    "CompactionInputFileCount_75th_percentile" : 0,
    "CompactionInputFileCount_90th_percentile" : 0,
    "CompactionInputFileCount_95th_percentile" : 0,
    "CompactionInputFileCount_98th_percentile" : 0,
    "CompactionInputFileCount_99th_percentile" : 0,
    "CompactionInputFileCount_99.9th_percentile" : 0,
    "PutBatch_num_ops" : 0,
    "PutBatch_min" : 0,
    "PutBatch_max" : 0,
    "PutBatch_mean" : 0,
    "PutBatch_25th_percentile" : 0,
    "PutBatch_median" : 0,
    "PutBatch_75th_percentile" : 0,
    "PutBatch_90th_percentile" : 0,
    "PutBatch_95th_percentile" : 0,
    "PutBatch_98th_percentile" : 0,
    "PutBatch_99th_percentile" : 0,
    "PutBatch_99.9th_percentile" : 0,
    "CompactionTime_num_ops" : 2,
    "CompactionTime_min" : 0,
    "CompactionTime_max" : 0,
    "CompactionTime_mean" : 0,
    "CompactionTime_25th_percentile" : 0,
    "CompactionTime_median" : 0,
    "CompactionTime_75th_percentile" : 0,
    "CompactionTime_90th_percentile" : 0,
    "CompactionTime_95th_percentile" : 0,
    "CompactionTime_98th_percentile" : 0,
    "CompactionTime_99th_percentile" : 0,
    "CompactionTime_99.9th_percentile" : 0,
    "CompactionTime_TimeRangeCount_600000-inf" : 2,
    "Get_num_ops" : 0,
    "Get_min" : 0,
    "Get_max" : 0,
    "Get_mean" : 0,
    "Get_25th_percentile" : 0,
    "Get_median" : 0,
    "Get_75th_percentile" : 0,
    "Get_90th_percentile" : 0,
    "Get_95th_percentile" : 0,
    "Get_98th_percentile" : 0,
    "Get_99th_percentile" : 0,
    "Get_99.9th_percentile" : 0,
    "MajorCompactionInputFileCount_num_ops" : 2,
    "MajorCompactionInputFileCount_min" : 0,
    "MajorCompactionInputFileCount_max" : 0,
    "MajorCompactionInputFileCount_mean" : 0,
    "MajorCompactionInputFileCount_25th_percentile" : 0,
    "MajorCompactionInputFileCount_median" : 0,
    "MajorCompactionInputFileCount_75th_percentile" : 0,
    "MajorCompactionInputFileCount_90th_percentile" : 0,
    "MajorCompactionInputFileCount_95th_percentile" : 0,
    "MajorCompactionInputFileCount_98th_percentile" : 0,
    "MajorCompactionInputFileCount_99th_percentile" : 0,
    "MajorCompactionInputFileCount_99.9th_percentile" : 0,
    "CheckAndPut_num_ops" : 0,
    "CheckAndPut_min" : 0,
    "CheckAndPut_max" : 0,
    "CheckAndPut_mean" : 0,
    "CheckAndPut_25th_percentile" : 0,
    "CheckAndPut_median" : 0,
    "CheckAndPut_75th_percentile" : 0,
    "CheckAndPut_90th_percentile" : 0,
    "CheckAndPut_95th_percentile" : 0,
    "CheckAndPut_98th_percentile" : 0,
    "CheckAndPut_99th_percentile" : 0,
    "CheckAndPut_99.9th_percentile" : 0,
    "SplitTime_num_ops" : 0,
    "SplitTime_min" : 0,
    "SplitTime_max" : 0,
    "SplitTime_mean" : 0,
    "SplitTime_25th_percentile" : 0,
    "SplitTime_median" : 0,
    "SplitTime_75th_percentile" : 0,
    "SplitTime_90th_percentile" : 0,
    "SplitTime_95th_percentile" : 0,
    "SplitTime_98th_percentile" : 0,
    "SplitTime_99th_percentile" : 0,
    "SplitTime_99.9th_percentile" : 0,
    "MajorCompactionOutputSize_num_ops" : 2,
    "MajorCompactionOutputSize_min" : 0,
    "MajorCompactionOutputSize_max" : 0,
    "MajorCompactionOutputSize_mean" : 0,
    "MajorCompactionOutputSize_25th_percentile" : 0,
    "MajorCompactionOutputSize_median" : 0,
    "MajorCompactionOutputSize_75th_percentile" : 0,
    "MajorCompactionOutputSize_90th_percentile" : 0,
    "MajorCompactionOutputSize_95th_percentile" : 0,
    "MajorCompactionOutputSize_98th_percentile" : 0,
    "MajorCompactionOutputSize_99th_percentile" : 0,
    "MajorCompactionOutputSize_99.9th_percentile" : 0,
    "MajorCompactionOutputSize_SizeRangeCount_100000000-inf" : 2,
    "majorCompactedInputBytes" : 8924,
    "slowAppendCount" : 0,
    "flushedOutputBytes" : 0,
    "CompactionOutputFileCount_num_ops" : 2,
    "CompactionOutputFileCount_min" : 0,
    "CompactionOutputFileCount_max" : 0,
    "CompactionOutputFileCount_mean" : 0,
    "CompactionOutputFileCount_25th_percentile" : 0,
    "CompactionOutputFileCount_median" : 0,
    "CompactionOutputFileCount_75th_percentile" : 0,
    "CompactionOutputFileCount_90th_percentile" : 0,
    "CompactionOutputFileCount_95th_percentile" : 0,
    "CompactionOutputFileCount_98th_percentile" : 0,
    "CompactionOutputFileCount_99th_percentile" : 0,
    "CompactionOutputFileCount_99.9th_percentile" : 0,
    "slowDeleteCount" : 0,
    "Replay_num_ops" : 0,
    "Replay_min" : 0,
    "Replay_max" : 0,
    "Replay_mean" : 0,
    "Replay_25th_percentile" : 0,
    "Replay_median" : 0,
    "Replay_75th_percentile" : 0,
    "Replay_90th_percentile" : 0,
    "Replay_95th_percentile" : 0,
    "Replay_98th_percentile" : 0,
    "Replay_99th_percentile" : 0,
    "Replay_99.9th_percentile" : 0,
    "FlushTime_num_ops" : 0,
    "FlushTime_min" : 0,
    "FlushTime_max" : 0,
    "FlushTime_mean" : 0,
    "FlushTime_25th_percentile" : 0,
    "FlushTime_median" : 0,
    "FlushTime_75th_percentile" : 0,
    "FlushTime_90th_percentile" : 0,
    "FlushTime_95th_percentile" : 0,
    "FlushTime_98th_percentile" : 0,
    "FlushTime_99th_percentile" : 0,
    "FlushTime_99.9th_percentile" : 0,
    "MajorCompactionInputSize_num_ops" : 2,
    "MajorCompactionInputSize_min" : 0,
    "MajorCompactionInputSize_max" : 0,
    "MajorCompactionInputSize_mean" : 0,
    "MajorCompactionInputSize_25th_percentile" : 0,
    "MajorCompactionInputSize_median" : 0,
    "MajorCompactionInputSize_75th_percentile" : 0,
    "MajorCompactionInputSize_90th_percentile" : 0,
    "MajorCompactionInputSize_95th_percentile" : 0,
    "MajorCompactionInputSize_98th_percentile" : 0,
    "MajorCompactionInputSize_99th_percentile" : 0,
    "MajorCompactionInputSize_99.9th_percentile" : 0,
    "MajorCompactionInputSize_SizeRangeCount_100000000-inf" : 2,
    "pauseInfoThresholdExceeded" : 0,
    "splitSuccessCount" : 0,
    "CheckAndDelete_num_ops" : 0,
    "CheckAndDelete_min" : 0,
    "CheckAndDelete_max" : 0,
    "CheckAndDelete_mean" : 0,
    "CheckAndDelete_25th_percentile" : 0,
    "CheckAndDelete_median" : 0,
    "CheckAndDelete_75th_percentile" : 0,
    "CheckAndDelete_90th_percentile" : 0,
    "CheckAndDelete_95th_percentile" : 0,
    "CheckAndDelete_98th_percentile" : 0,
    "CheckAndDelete_99th_percentile" : 0,
    "CheckAndDelete_99.9th_percentile" : 0,
    "CompactionInputSize_num_ops" : 2,
    "CompactionInputSize_min" : 0,
    "CompactionInputSize_max" : 0,
    "CompactionInputSize_mean" : 0,
    "CompactionInputSize_25th_percentile" : 0,
    "CompactionInputSize_median" : 0,
    "CompactionInputSize_75th_percentile" : 0,
    "CompactionInputSize_90th_percentile" : 0,
    "CompactionInputSize_95th_percentile" : 0,
    "CompactionInputSize_98th_percentile" : 0,
    "CompactionInputSize_99th_percentile" : 0,
    "CompactionInputSize_99.9th_percentile" : 0,
    "CompactionInputSize_SizeRangeCount_100000000-inf" : 2,
    "MajorCompactionOutputFileCount_num_ops" : 2,
    "MajorCompactionOutputFileCount_min" : 0,
    "MajorCompactionOutputFileCount_max" : 0,
    "MajorCompactionOutputFileCount_mean" : 0,
    "MajorCompactionOutputFileCount_25th_percentile" : 0,
    "MajorCompactionOutputFileCount_median" : 0,
    "MajorCompactionOutputFileCount_75th_percentile" : 0,
    "MajorCompactionOutputFileCount_90th_percentile" : 0,
    "MajorCompactionOutputFileCount_95th_percentile" : 0,
    "MajorCompactionOutputFileCount_98th_percentile" : 0,
    "MajorCompactionOutputFileCount_99th_percentile" : 0,
    "MajorCompactionOutputFileCount_99.9th_percentile" : 0,
    "ScanSize_num_ops" : 0,
    "ScanSize_min" : 0,
    "ScanSize_max" : 0,
    "ScanSize_mean" : 0,
    "ScanSize_25th_percentile" : 0,
    "ScanSize_median" : 0,
    "ScanSize_75th_percentile" : 0,
    "ScanSize_90th_percentile" : 0,
    "ScanSize_95th_percentile" : 0,
    "ScanSize_98th_percentile" : 0,
    "ScanSize_99th_percentile" : 0,
    "ScanSize_99.9th_percentile" : 0,
    "slowGetCount" : 0,
    "flushedMemstoreBytes" : 0,
    "CompactionOutputSize_num_ops" : 2,
    "CompactionOutputSize_min" : 0,
    "CompactionOutputSize_max" : 0,
    "CompactionOutputSize_mean" : 0,
    "CompactionOutputSize_25th_percentile" : 0,
    "CompactionOutputSize_median" : 0,
    "CompactionOutputSize_75th_percentile" : 0,
    "CompactionOutputSize_90th_percentile" : 0,
    "CompactionOutputSize_95th_percentile" : 0,
    "CompactionOutputSize_98th_percentile" : 0,
    "CompactionOutputSize_99th_percentile" : 0,
    "CompactionOutputSize_99.9th_percentile" : 0,
    "CompactionOutputSize_SizeRangeCount_100000000-inf" : 2,
    "majorCompactedOutputBytes" : 8924,
    "PauseTimeWithoutGc_num_ops" : 0,
    "PauseTimeWithoutGc_min" : 0,
    "PauseTimeWithoutGc_max" : 0,
    "PauseTimeWithoutGc_mean" : 0,
    "PauseTimeWithoutGc_25th_percentile" : 0,
    "PauseTimeWithoutGc_median" : 0,
    "PauseTimeWithoutGc_75th_percentile" : 0,
    "PauseTimeWithoutGc_90th_percentile" : 0,
    "PauseTimeWithoutGc_95th_percentile" : 0,
    "PauseTimeWithoutGc_98th_percentile" : 0,
    "PauseTimeWithoutGc_99th_percentile" : 0,
    "PauseTimeWithoutGc_99.9th_percentile" : 0,
    "slowPutCount" : 0,
    "slowIncrementCount" : 0,
    "compactedInputBytes" : 8924,
    "Append_num_ops" : 0,
    "Append_min" : 0,
    "Append_max" : 0,
    "Append_mean" : 0,
    "Append_25th_percentile" : 0,
    "Append_median" : 0,
    "Append_75th_percentile" : 0,
    "Append_90th_percentile" : 0,
    "Append_95th_percentile" : 0,
    "Append_98th_percentile" : 0,
    "Append_99th_percentile" : 0,
    "Append_99.9th_percentile" : 0,
    "FlushOutputSize_num_ops" : 0,
    "FlushOutputSize_min" : 0,
    "FlushOutputSize_max" : 0,
    "FlushOutputSize_mean" : 0,
    "FlushOutputSize_25th_percentile" : 0,
    "FlushOutputSize_median" : 0,
    "FlushOutputSize_75th_percentile" : 0,
    "FlushOutputSize_90th_percentile" : 0,
    "FlushOutputSize_95th_percentile" : 0,
    "FlushOutputSize_98th_percentile" : 0,
    "FlushOutputSize_99th_percentile" : 0,
    "FlushOutputSize_99.9th_percentile" : 0,
    "Bulkload_count" : 0,
    "Bulkload_mean_rate" : 0.0,
    "Bulkload_1min_rate" : 0.0,
    "Bulkload_5min_rate" : 0.0,
    "Bulkload_15min_rate" : 0.0,
    "Bulkload_num_ops" : 0,
    "Bulkload_min" : 0,
    "Bulkload_max" : 0,
    "Bulkload_mean" : 0,
    "Bulkload_25th_percentile" : 0,
    "Bulkload_median" : 0,
    "Bulkload_75th_percentile" : 0,
    "Bulkload_90th_percentile" : 0,
    "Bulkload_95th_percentile" : 0,
    "Bulkload_98th_percentile" : 0,
    "Bulkload_99th_percentile" : 0,
    "Bulkload_99.9th_percentile" : 0
  } ]
}

如上监控主要是HBase 内某个RegionServer 详细信息。具体有gc, scan,flush ,block,compaction 等细粒度的监控。


二、JMX监控信息钉钉告警实现


下面我们实现了一个RegionServer运行时长的钉钉通知消息

python 复制代码
# -*- coding: utf-8 -*-

import time
import requests
import json

import schedule as schedule

"""
~~~~~~~~~~~~
author: kangll
date: 2025/02/11 11:50
desc: reid cluster  HBase JMX  获取指标信息
-- curl 请求: curl http://hdp-node2:16030/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server
    2小时发出一次正常的RS运行状态信息,RS运行时间小于10分钟且当前小时立即发出一次告警
"""

__author__ = 'kanglilong  <kangll@winnerinf.com>'

headers = {'Content-Type': 'application/json;charset=utf-8'}
hostArr = {"hdp-node1", "hdp-node2", "hdp-node3"}
dingding_url = "https://oapi.dingtalk.com/robot/send?access_token=ba7693ae5a1a5a4cda1358f35b19785a6d8a7659da92ba3685d6532994a6d82c"

# 记录上一次发送运行时间小于 10 分钟告警的小时
last_less_than_10mins_alert_hour = None


def jmxGetHBaseStatus(regionserver_host):
    """
    从 HBase JMX 接口获取 RegionServer 运行时长信息
    :return: 告警信息
    """
    jmx_port = 16030
    # 构建JMX查询URL,用于获取运行时间指标
    jmx_url = f'http://{regionserver_host}:{jmx_port}/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server'
    try:
        # 发送HTTP请求获取JMX数据
        response = requests.get(jmx_url)
        # 检查响应状态码
        response.raise_for_status()
        # 解析JSON响应
        jmx_data = response.json()
        # 从JMX数据中提取运行时间(单位:毫秒)
        region_server_start_time = jmx_data['beans'][0]['regionServerStartTime']
        # 获取当前时间戳(毫秒)
        current_time = int(time.time() * 1000)
        # 计算RegionServer运行时长(毫秒)
        uptime = current_time - region_server_start_time
        # 将运行时长转换为时分秒格式
        uptime_hms = convert_milliseconds_to_hms(uptime)
        text = f"hostname: {regionserver_host}, RegionServer uptime: {uptime_hms}"
        return text

    except requests.exceptions.RequestException as e:
        print(f'请求出错: {e}')
    except (KeyError, IndexError, json.JSONDecodeError) as e:
        print(f'解析 JMX 数据出错: {e}')


def jmxGetHBaseAlarmStatus(regionserver_host):
    """
    从 HBase JMX 接口获取 RegionServer 重启的运行时长,也就是运行时间小于10min
    :return: 告警信息
    """
    jmx_port = 16030
    # 构建JMX查询URL,用于获取运行时间指标
    jmx_url = f'http://{regionserver_host}:{jmx_port}/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server'
    try:
        text = ""
        now_time = time.localtime(time.time())
        formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time)
        # 发送HTTP请求获取JMX数据
        response = requests.get(jmx_url)
        if response.status_code == 200:
            # 检查响应状态码
            # response.raise_for_status()
            # 解析JSON响应
            jmx_data = response.json()
            if len(jmx_data['beans'][0]) > 400:
                # print("---", less_than_10mins_alert_sent)
                # if jmx_data is not None and len(jmx_data) > 0:
                # 从JMX数据中提取运行时间(单位:毫秒)
                region_server_start_time = jmx_data['beans'][0]['regionServerStartTime']
                # 获取当前时间戳(毫秒)
                current_time = int(time.time() * 1000)
                # 计算 RegionServer 运行时长(毫秒)
                uptime = current_time - region_server_start_time
                # 将运行时长转换为时分秒格式
                uptime_hms = convert_milliseconds_to_hms(uptime)
                #
                current_hour = time.localtime().tm_hour
                global last_less_than_10mins_alert_hour
                if uptime is not None:
                    if uptime < 10 * 60 * 1000:  # 运行时间小于 10 分钟
                        if last_less_than_10mins_alert_hour is None or last_less_than_10mins_alert_hour != current_hour:
                            print("++++", last_less_than_10mins_alert_hour)
                            text = "告警类型: reid 集群HBase 重启告警通知 \n" + "告警信息: \n" + f"hostname: {regionserver_host} ,RegionServer uptime: {uptime_hms} " + "\n告警时间:" + formatted_time
                            # 发出告警
                            msg(text, dingding_url)
                            last_less_than_10mins_alert_hour = current_hour
                            # print(f"hostname: {regionserver_host}, RegionServer uptime: {uptime_hms}")
                return text

    except requests.exceptions.RequestException as e:
        print(f'请求出错: {e}')
    except (KeyError, IndexError, json.JSONDecodeError) as e:
        print(f'解析 JMX 数据出错: {e}')
    return None


def convert_milliseconds_to_hms(milliseconds):
    """
    将毫秒转换为时分秒的格式
    :param milliseconds: 毫秒数
    :return: 时分秒格式的字符串
    """
    seconds = milliseconds // 1000
    hours = seconds // 3600
    seconds %= 3600
    minutes = seconds // 60
    seconds %= 60
    return f"{hours}小时 {minutes}分钟 {seconds}秒."


def getAllHostsHBase(alert_message=""):
    """
    从 HBase JMX 接口获取 RegionServer 运行时长信息
    :return: 正常通知信息或 None
    """
    count = 0
    now_time = time.localtime(time.time())
    formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time)
    # 将集合转换为列表,并进行排序
    sorted_hosts = sorted(list(hostArr))
    alert_message += "告警类型: reid 集群HBase告警通知 \n" + "告警信息: \n"
    for host in sorted_hosts:
        line_alarm = str(jmxGetHBaseStatus(host))
        count += 1
        alert_message += "\t" + str(count) + "." + line_alarm + "\n"
    alert_message += "\n告警时间:" + formatted_time
    print(alert_message)
    notify_msg(alert_message, dingding_url)


def check_and_alert():
    """
      检查运行时长,若小于 10 分钟且满足条件则立即发送
    """
    now_time = time.localtime(time.time())
    formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time)
    # 将集合转换为列表,并进行排序
    sorted_hosts = sorted(list(hostArr))
    for host in sorted_hosts:
        alarm_str = jmxGetHBaseAlarmStatus(host)
        print(alarm_str)
        if alarm_str is not None and alarm_str != "":
            print("时间: ", formatted_time, "主机:", host, 'RegionServer 重启告警发出!')
        else:
            print("时间: ", formatted_time, "主机:", host, 'RegionServer 状 态 正 常!')


def msg(text, api_url):
    """
    钉钉告警发出 通知具体负责人
    :param text: 告警文本
    :param api_url: 钉钉URL
    :return: 无返回值
    """
    json_text = {
        "msgtype": "text",
        "text": {
            "content": text
        }, "at": {
            "atMobiles": [""]
        }

    }
    requests.post(api_url, json.dumps(json_text), headers=headers).content


def notify_msg(text, api_url):
    """
    钉钉告警发出
    :param text: 告警文本
    :param api_url: 钉钉URL
    :return: 无返回值
    """
    json_text = {
        "msgtype": "text",
        "text": {
            "content": text
        }, "at": {
            "atMobiles": [""]
        }

    }
    requests.post(api_url, json.dumps(json_text), headers=headers).content


def correct_msg(text, api_url):
    """
    钉钉告警发出, 组件正常的告警信息,不艾特告警人
    :param text: 告警文本
    :param api_url: 钉钉URL
    :return: 无返回值
    """
    json_text = {
        "msgtype": "text",
        "text": {
            "content": text
        }, "at": {
            "atMobiles": [""]
        }

    }
    requests.post(api_url, json.dumps(json_text), headers=headers).content


if __name__ == '__main__':

    # 设定整点执行常规告警任务
    schedule.every().hour.at(":00").do(getAllHostsHBase)

    while True:
        check_and_alert()
        schedule.run_pending()
        time.sleep(10)

钉钉告警通知:

相关推荐
梦醒沉醉2 天前
HBase Shell
大数据·数据库·hbase
狮歌~资深攻城狮4 天前
HBase高级技巧:解锁更强大的数据处理能力
大数据·算法·hbase
狮歌~资深攻城狮4 天前
如何学习HBase:从入门到精通的完整指南
大数据·hbase
狮歌~资深攻城狮4 天前
HBASE面试题
大数据·hbase
黄雪超6 天前
深入HBase——引入
大数据·数据库·hbase
苍老流年6 天前
10. Hbase Compaction命令
大数据·数据库·hbase
狮歌~资深攻城狮6 天前
深入浅出理解HBase:大数据时代的“超级仓库”
大数据·hbase
四十aaa10 天前
前端监控SDK:从基础到实践 (3. 行为监控)
前端·javascript·监控
rocksun12 天前
OPENTELEMETRY:GO可观测性指南
监控