一、目的
梳理一下目前Hive数仓各层表
注意:只建立一个数据库hurys_db
二、ODS层
2.1 建表语句
create external table if not exists hurys_db.ods_queue(
device_no string comment '设备编号',
source_device_type string comment '设备类型',
sn string comment '设备序列号 ',
model string comment '设备型号',
create_time string comment '创建时间',
lane_no int comment '车道编号',
lane_type int comment '车道类型 0:渠化1:来向2:出口3:去向4:左弯待转区5:直行待行区6:右转专用道99:未定义车道',
queue_count int comment '排队车辆数',
queue_len float comment '排队长度(m)',
queue_head float comment '排队头车距停止线距离(m)',
queue_tail float comment '排队尾车距停止线距离(m)'
)
comment '静态排队数据外部表——静态分区'
partitioned by (day string)
row format delimited fields terminated by ','
stored as SequenceFile
;
2.2 SQL语句
--刷新表分区
msck repair table ods_queue;
--查看表分区
show partitions ods_queue;
--查看表数据
select * from ods_queue;
三、DWD层
3.1 脱敏清洗表
3.1.1 建表语句
create table if not exists hurys_db.dwd_queue(
id string comment '唯一ID',
device_no string comment '设备编号',
source_device_type string comment '设备类型',
sn string comment '设备序列号 ',
model string comment '设备型号',
create_time string comment '创建时间',
lane_no int comment '车道编号',
lane_type int comment '车道类型 0:渠化1:来向2:出口3:去向4:左弯待转区5:直行待行区6:右转专用道99:未定义车道',
queue_count int comment '排队车辆数',
queue_len decimal(10,2) comment '排队长度(m)',
queue_head decimal(10,2) comment '排队头车距停止线距离(m)',
queue_tail decimal(10,2) comment '排队尾车距停止线距离(m)'
)
comment '静态排队数据表——动态分区'
partitioned by (day string)
stored as orc
;
3.1.2 SQL语句
with t1 as (
select
device_no,
source_device_type,
sn,
model,
create_time,
lane_no,
lane_type,
case when queue_count is null then 0 else queue_count end as queue_count,
case when queue_len is null then 0 else cast(queue_len as decimal(10,2)) end as queue_len,
case when queue_head is null then 0 else cast(queue_head as decimal(10,2)) end as queue_head,
case when queue_tail is null then 0 else cast(queue_tail as decimal(10,2)) end as queue_tail,
substr(create_time,1,10) day
from hurys_db.ods_queue
where day = '2024-09-10'
)
insert overwrite table hurys_db.dwd_queue partition(day)
select
UUID() as id,
device_no,
source_device_type,
sn,
model,
create_time,
lane_no,
lane_type,
queue_count,
queue_len,
queue_head,
queue_tail,
day
from t1
where day = '2024-09-10' and device_no is not null and create_time is not null
and queue_len between 0 and 500 and queue_head between 0 and 500 and lane_no between 0 and 255
and queue_tail between 0 and 500 and queue_count between 0 and 100
group by device_no, source_device_type, sn, model, create_time, lane_no, lane_type, queue_count, queue_len, queue_head, queue_tail, day
;
--查看分区
show partitions hurys_db.dwd_queue;
--查看数据
select * from hurys_db.dwd_queue
where day='2024-09-10';
--删掉表分区
alter table hurys_db.dwd_queue drop partition (day='2024-09-04');
四、维度表
4.1 节假日表
4.1.1 原始表建表语句
create external table if not exists hurys_db.tb_holiday(
id int comment '主键id',
day date comment '日期',
holiday string comment '节假日',
year string comment '年份'
)
comment '节假日表'
row format delimited fields terminated by ','
stored as textfile location '/data/tb_holiday'
tblproperties("skip.header.line.count"="1") ;
4.1.2 清洗表建表语句
create table if not exists hurys_db.dwd_holiday(
day date comment '日期',
holiday string comment '节假日',
year string comment '年份'
)
comment '节假日表'
stored as orc
;
五、DWS层
5.1 统计数据流量表------动态分区------1天周期
5.1.1 建表语句
create table if not exists hurys_db.dws_statistics_volume_1day(
device_no string comment '设备编号',
scene_name string comment '场景名称',
lane_no int comment '车道编号',
lane_direction string comment '车道流向',
section_no int comment '断面编号',
device_direction string comment '雷达朝向',
sum_volume_day int comment '每天总流量',
week_day string comment '周几',
month string comment '月份'
)
comment '统计数据流量表——动态分区——1天周期'
partitioned by (day string)
stored as orc
;
5.1.2 SQL语句
insert overwrite table hurys_db.dws_statistics_volume_1day partition(day)
select
dwd_st.device_no,
dwd_sc.scene_name,
dwd_st.lane_no,
dwd_rl.lane_direction,
dwd_st.section_no,
dwd_rc.device_direction,
sum(volume_sum) sum_volume_day,
case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六'
else '周日' end as week_day,
substr(day,1,7) month,
day
from hurys_db.dwd_statistics as dwd_st
right join hurys_db.dwd_radar_lane as dwd_rl
on dwd_rl.device_no=dwd_st.device_no and dwd_rl.lane_no=dwd_st.lane_no
right join hurys_db.dwd_device_scene as dwd_ds
on dwd_ds.device_no=dwd_st.device_no
right join hurys_db.dwd_scene as dwd_sc
on dwd_sc.scene_id = dwd_ds.scene_id
right join hurys_db.dwd_radar_config as dwd_rc
on dwd_rc.device_no=dwd_st.device_no
where dwd_st.create_time is not null and dwd_st.day='2024-09-05'
group by dwd_st.device_no, dwd_sc.scene_name, dwd_st.lane_no, dwd_rl.lane_direction, dwd_st.section_no, dwd_rc.device_direction, case when pmod(datediff(create_time,'2023-11-27') + 1,7) = 1 then '周一'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 2 then '周二'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 3 then '周三'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 4 then '周四'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 5 then '周五'
when pmod(datediff(create_time,'2023-11-27') + 1,7) = 6 then '周六'
else '周日' end, day
;
这就是目前数仓各层的表样例,这边只是逻辑分层,并没有给每一层建立一个单独的数据库,那样在多层的表联合查询时速度会相比慢很多!!!