一、目的
在对DWD层清洗数据进行补全后,需要生成相应的补全记录,作为数据的标记
二、实施步骤
2.1 建表
create table if not exists hurys_jw.dwd_data_correction_record(
data_type Int32 comment '数据类型 1:转向比,2:统计,3:评价,4:区域,6:静态排队,7:动态排队',
device_no String comment '设备编号',
id String comment '唯一ID',
create_time DateTime comment '创建时间',
record_type Int32 comment '记录类型 0:补全,1:修复',
day Date comment '日期'
)
ENGINE = MergeTree
PARTITION BY day
PRIMARY KEY (day,id)
ORDER BY (day,id)
SETTINGS index_granularity = 8192;
2.2 SQL语句
--1.2统计数据补全记录
select
'2' data_type,
t2.device_no,
t2.id,
t2.create_time,
'0' record_type,
cast(t2.day as String) day
from hurys_jw.dwd_statistics as t2
left join hurys_jw.ods_statistics as t3
on t3.device_no=t2.device_no and t3.create_time=t2.create_time and t3.lane_no=t2.lane_no
and t3.section_no = t2.section_no and t3.coil_no=t2.coil_no
where t2.day='2024-12-16' and length(t3.device_no)=0
;
注意红色部分,由于DWD清洗表的device_no没有设置允许非空,因此不能使用 t3.device_no is null 作为条件
2.3 Kettle任务
data:image/s3,"s3://crabby-images/6d36b/6d36b679f1b0f9f81bdb3d319ba9ba2fe3d148a2" alt=""
2.3.1 newtime
data:image/s3,"s3://crabby-images/90bbd/90bbd45791e05b59c771e3c7036d018b160a6dbb" alt=""
2.3.2 替换NULL值
data:image/s3,"s3://crabby-images/3f0ed/3f0edfe18ffef36f65498bbbaf7d09f65d4ad95a" alt=""
2.3.3 clickhouse输入
data:image/s3,"s3://crabby-images/58ce7/58ce75d327ce0575636567bce883cbdf972963c1" alt=""
2.3.4 字段选择
data:image/s3,"s3://crabby-images/7ee77/7ee7703acf3d87a1cef3da4dd8fe6caa42d8dd2b" alt=""
2.3.5 clickhouse输出
data:image/s3,"s3://crabby-images/ae72a/ae72ad25c19ef49afa7c8df912d299381cfb290e" alt=""
2.3.6 Kettle任务运行
data:image/s3,"s3://crabby-images/6f8de/6f8dec03e12d9edace65d4dee982f027790497d6" alt=""
搞定!