如何利用Paimon做流量定时检查? --- 试试标签表

graph TD A["Source: 网络流量"] --> B["Flink预聚合层"] B --> C["Paimon Aggregation Table"] C --> D["Flink流式读取"] D --> E["实时告警处理"] E --> F["Elasticsearch"] B --> G["Paimon Append Table"] G --> H["批处理分析"] H --> F

主表

sql 复制代码
-- 创建源表
CREATE TABLE source_table (  
    dt TIMESTAMP(3),  
    sip STRING,  
    sport INT,  
    dip STRING,  
    dport INT,  
    protocol STRING,  
    flow_size BIGINT,  
    WATERMARK FOR dt AS dt - INTERVAL '5' SECOND  
) WITH (  
    'connector' = 'kafka',  
    'topic' = 'network-flow',  
    'properties.bootstrap.servers' = 'localhost:9092',  
    'format' = 'json'  
);  
  
  1. 创建每日流量聚合表

创建一个按天聚合的表来存储每日总流量:

sql 复制代码
-- 创建每日流量聚合表
CREATE TABLE daily_flow_summary (  
    dt_date STRING,  
    total_flow_bytes BIGINT,  
    last_check_time TIMESTAMP(3),  
    PRIMARY KEY (dt_date) NOT ENFORCED  
) WITH (  
    'merge-engine' = 'aggregation',  
    'fields.total_flow_bytes.aggregate-function' = 'sum',  
    'fields.last_check_time.aggregate-function' = 'last_value',  
    'changelog-producer' = 'lookup'  
);  
  
-- 创建每日流量明细表(用于存储贡献最大的记录)
CREATE TABLE daily_flow_details (  
    dt_date STRING,  
    sip STRING,  
    sport INT,  
    dip STRING,  
    dport INT,  
    protocol STRING,  
    flow_bytes BIGINT,  
    PRIMARY KEY (dt_date, sip, sport, dip, dport, protocol) NOT ENFORCED  
) WITH (  
    'merge-engine' = 'aggregation',  
    'fields.flow_bytes.aggregate-function' = 'sum',  
    'changelog-producer' = 'lookup'  
);
  1. 数据聚合作业

从原始流量数据聚合到每日汇总:

sql 复制代码
-- 聚合每日总流量
INSERT INTO daily_flow_summary  
SELECT   
    DATE_FORMAT(dt, 'yyyy-MM-dd') as dt_date,  
    SUM(flow_size) as total_flow_bytes,  
    CURRENT_TIMESTAMP as last_check_time  
FROM source_table  
GROUP BY DATE_FORMAT(dt, 'yyyy-MM-dd');  
  
-- 聚合每日明细数据
INSERT INTO daily_flow_details  
SELECT   
    DATE_FORMAT(dt, 'yyyy-MM-dd') as dt_date,  
    sip, sport, dip, dport, protocol,  
    SUM(flow_size) as flow_bytes  
FROM source_table  
GROUP BY DATE_FORMAT(dt, 'yyyy-MM-dd'), sip, sport, dip, dport, protocol;
  1. 告警检查作业(每小时执行)

使用Paimon的定时标签功能来实现每小时检查:

sql 复制代码
-- 创建告警配置表
CREATE TABLE alert_config (  
    config_key STRING,  
    threshold_value BIGINT,  
    PRIMARY KEY (config_key) NOT ENFORCED  
);  
  
-- 插入流量阈值配置(例如:每天100GB)
INSERT INTO alert_config VALUES ('daily_flow_threshold', 107374182400);  
  
-- 创建告警结果表
CREATE TABLE flow_alert_results (  
    dt_date STRING,  
    total_flow_bytes BIGINT,  
    threshold_value BIGINT,  
    alert_triggered BOOLEAN,  
    top_contributors STRING, -- JSON格式存储贡献最大的明细
    alert_time TIMESTAMP(3),  
    PRIMARY KEY (dt_date, alert_time) NOT ENFORCED  
) WITH (  
    'connector' = 'elasticsearch-7',  
    'hosts' = 'http://localhost:9200',  
    'index' = 'daily-flow-alerts'  
);
  1. 告警检查和明细提取逻辑
sql 复制代码
-- 每小时执行的告警检查作业
INSERT INTO flow_alert_results  
SELECT   
    s.dt_date,  
    s.total_flow_bytes,  
    c.threshold_value,  
    CASE WHEN s.total_flow_bytes >= c.threshold_value THEN true ELSE false END as alert_triggered,  
    CASE   
        WHEN s.total_flow_bytes >= c.threshold_value THEN  
            -- 获取贡献最大的前10条明细,转换为JSON格式
            (SELECT LISTAGG(  
                CONCAT('{"sip":"', sip, '","sport":', sport, ',"dip":"', dip, '","dport":', dport, ',"protocol":"', protocol, '","flow_bytes":', flow_bytes, '}'),   
                ','  
            )   
            FROM (  
                SELECT sip, sport, dip, dport, protocol, flow_bytes,  
                       ROW_NUMBER() OVER (ORDER BY flow_bytes DESC) as rn  
                FROM daily_flow_details d  
                WHERE d.dt_date = s.dt_date  
            ) WHERE rn <= 10)  
        ELSE NULL   
    END as top_contributors,  
    CURRENT_TIMESTAMP as alert_time  
FROM daily_flow_summary s  
CROSS JOIN alert_config c  
WHERE c.config_key = 'daily_flow_threshold'  
  AND s.dt_date = DATE_FORMAT(CURRENT_TIMESTAMP, 'yyyy-MM-dd')  
  AND s.total_flow_bytes >= c.threshold_value; -- 只有满足阈值才插入告警
  1. 定时作业配置
sql 复制代码
-- 设置表属性以支持每小时自动检查
ALTER TABLE daily_flow_summary SET (  
    'tag.automatic-creation' = 'process-time',  
    'tag.creation-period' = 'hourly',  
    'tag.creation-delay' = '5 m'  
);  
  
-- 创建定时检查作业的完整SQL
EXECUTE STATEMENT SET  
BEGIN  
    -- 持续聚合数据
    INSERT INTO daily_flow_summary  
    SELECT   
        DATE_FORMAT(dt, 'yyyy-MM-dd') as dt_date,  
        SUM(flow_size) as total_flow_bytes,  
        CURRENT_TIMESTAMP as last_check_time  
    FROM source_table  
    GROUP BY DATE_FORMAT(dt, 'yyyy-MM-dd');  
      
    INSERT INTO daily_flow_details    
    SELECT   
        DATE_FORMAT(dt, 'yyyy-MM-dd') as dt_date,  
        sip, sport, dip, dport, protocol,  
        SUM(flow_size) as flow_bytes  
    FROM source_table  
    GROUP BY DATE_FORMAT(dt, 'yyyy-MM-dd'), sip, sport, dip, dport, protocol;  
      
    -- 告警检查(基于流式读取,每小时触发)
    INSERT INTO flow_alert_results  
    SELECT   
        s.dt_date,  
        s.total_flow_bytes,  
        c.threshold_value,  
        true as alert_triggered,  
        (SELECT CONCAT('[', LISTAGG(  
            CONCAT('{"sip":"', sip, '","sport":', sport, ',"dip":"', dip, '","dport":', dport, ',"protocol":"', protocol, '","flow_bytes":', flow_bytes, '}'),   
            ','  
        ), ']')  
        FROM (  
            SELECT sip, sport, dip, dport, protocol, flow_bytes,  
                   ROW_NUMBER() OVER (ORDER BY flow_bytes DESC) as rn  
            FROM daily_flow_details d  
            WHERE d.dt_date = s.dt_date  
        ) WHERE rn <= 10) as top_contributors,  
        CURRENT_TIMESTAMP as alert_time  
    FROM daily_flow_summary s  
    CROSS JOIN alert_config c  
    WHERE c.config_key = 'daily_flow_threshold'  
      AND s.total_flow_bytes >= c.threshold_value;  
END;
相关推荐
码事漫谈1 分钟前
C++ vector越界问题完全解决方案:从基础防护到现代C++新特性
后端
啾啾大学习1 小时前
让我们快速入门DDD
后端·领域驱动设计
老张聊数据集成1 小时前
数据分析师如何构建自己的底层逻辑?
后端·数据分析
咕噜分发企业签名APP加固彭于晏1 小时前
市面上有多少智能体平台
前端·后端
掘金一周2 小时前
我开源了一款 Canvas “瑞士军刀”,十几种“特效与工具”开箱即用 | 掘金一周 8.14
前端·人工智能·后端
村姑飞来了2 小时前
Spring 扩展:动态使某个 @Import 方式导入的 @Configuration 类失效
后端
开心就好20252 小时前
前端性能优化移动端网页滚动卡顿与掉帧问题实战
后端
paopaokaka_luck2 小时前
校园快递小程序(腾讯地图API、二维码识别、Echarts图形化分析)
vue.js·spring boot·后端·小程序·uni-app