1、准备数据
在本地写一个txt
1001,2023-01-01 08:15:22
1002,2023-01-01 09:30:45
1003,2023-01-01 10:45:12
1004,2023-01-01 11:20:33
1005,2023-01-01 12:10:55
1006,2023-01-01 13:45:28
1007,2023-01-01 14:30:17
1008,2023-01-01 15:25:42
1009,2023-01-01 16:40:33
1010,2023-01-01 17:15:58
1001,2023-01-02 08:20:15
1002,2023-01-02 09:35:30
1003,2023-01-02 10:50:45
1004,2023-01-02 11:25:18
1005,2023-01-02 12:15:40
1006,2023-01-02 13:50:22
1007,2023-01-02 14:35:37
1008,2023-01-02 15:30:28
1009,2023-01-02 16:45:15
1010,2023-01-02 17:20:50
1011,2023-01-03 08:10:33
1012,2023-01-03 09:25:47
1013,2023-01-03 10:40:22
1014,2023-01-03 11:15:55
1015,2023-01-03 12:05:38
1016,2023-01-03 13:40:15
1017,2023-01-03 14:25:50
1018,2023-01-03 15:20:35
1019,2023-01-03 16:35:20
1020,2023-01-03 17:10:45
1001,2023-01-04 08:25:18
1002,2023-01-04 09:40:33
1003,2023-01-04 10:55:47
1004,2023-01-04 11:30:22
1005,2023-01-04 12:20:55
1006,2023-01-04 13:55:38
1007,2023-01-04 14:40:15
1008,2023-01-04 15:35:50
1009,2023-01-04 16:50:35
1010,2023-01-04 17:25:20
1021,2023-01-05 08:05:45
1022,2023-01-05 09:20:30
1023,2023-01-05 10:35:15
1024,2023-01-05 11:10:48
1025,2023-01-05 12:00:33
1026,2023-01-05 13:35:10
1027,2023-01-05 14:20:45
1028,2023-01-05 15:15:30
1029,2023-01-05 16:30:15
1030,2023-01-05 17:05:50
1011,2023-01-06 08:30:22
1012,2023-01-06 09:45:37
1013,2023-01-06 11:00:15
1014,2023-01-06 11:35:50
1015,2023-01-06 12:25:35
1016,2023-01-06 14:00:20
1017,2023-01-06 14:45:55
1018,2023-01-06 15:40:33
1019,2023-01-06 16:55:18
1020,2023-01-06 17:30:45
1031,2023-01-07 08:15:30
1032,2023-01-07 09:30:15
1033,2023-01-07 10:45:50
1034,2023-01-07 11:20:35
1035,2023-01-07 12:10:20
1036,2023-01-07 13:45:55
1037,2023-01-07 14:30:40
1038,2023-01-07 15:25:25
1039,2023-01-07 16:40:10
1040,2023-01-07 17:15:45
1021,2023-01-08 08:20:33
1022,2023-01-08 09:35:18
1023,2023-01-08 10:50:53
1024,2023-01-08 11:25:38
1025,2023-01-08 12:15:23
1026,2023-01-08 13:50:58
1027,2023-01-08 14:35:43
1028,2023-01-08 15:30:28
1029,2023-01-08 16:45:13
1030,2023-01-08 17:20:48
2、在hive准备表
create table login1(user_id int,login_date timestamp)
row format delimited fields terminated by ',';
3、将数据插入表中
load data local inpath '/root/ys/login_date.txt' into table login1;
4、计算每个用户最大登录天数的开始时间和结束时间
sql
--截取时间到年月日,去重
with a as(
select user_id,date_format(login_date,'yyyy-MM-dd') as login_date from login1 group by user_id,date_format(login_date,'yyyy-MM-dd')),
--按用户id分组,并添加一列排序
b as(
select user_id,login_date,ROW_NUMBER() over(partition by user_id order by login_date) rn from a ),
--用日期减去排名
c as(
select user_id,login_date,rn,date_sub(login_date,rn) from b),
--计算每个连续登录的天数以及最小时间和最大时间
d as(
select user_id,count(date_sub(login_date,rn)) ts,min(login_date) min_date,max(login_date) max_date from b group by user_id)
--计算最大登录天数的开始日期和结束日期
select user_id,max(ts) max_ts,min_date,max_date from d group by user_id,min_date,max_date