模拟生成数据
sql
CREATE TABLE ods_station_log (
base_station_id int, -- 基站ID
event_type int, -- 事件类型: connect/disconnect
event_time TIMESTAMP_LTZ(3), -- 事件时间
WATERMARK FOR event_time AS event_time - INTERVAL '5' SECOND -- 允许5秒乱序
) WITH (
'connector' = 'datagen',
-- 基站ID生成规则(BS001 ~ BS100)
'fields.base_station_id.kind' = 'random',
'fields.base_station_id.min' = '1',
'fields.base_station_id.max' = '2',
-- 事件类型随机生成(50%概率为 connect/disconnect)
'fields.event_type.kind' = 'random',
'fields.event_type.min' = '0',
'fields.event_type.max' = '1',
-- 事件时间生成规则(模拟1小时数据,每秒10条)
'fields.event_time.kind' = 'random',
'fields.event_time.max-past' = '1000s',
-- 控制数据生成速率
'rows-per-second' = '1'
);
需求: 基站与服务器长连接,每次连接和断开都会上报数据,统计过去一小时每个基站断开次数和时长
思路:
sql
CREATE TEMPORARY VIEW disconnect_records2 AS
SELECT
base_station_id,
connect_time,
disconnect_time,
TIMESTAMPDIFF(SECOND, connect_time, disconnect_time) AS duration
FROM ods_station_log
MATCH_RECOGNIZE (
PARTITION BY base_station_id
ORDER BY event_time
MEASURES
e_connect.event_time AS connect_time,
e_disconnect.event_time AS disconnect_time
ONE ROW PER MATCH
AFTER MATCH SKIP PAST LAST ROW
PATTERN (e_connect e_disconnect)
DEFINE
e_connect AS e_connect.event_type = 1,
e_disconnect AS e_disconnect.event_type = 0
);