Create table If Not Exists Failed (fail_date date)
Create table If Not Exists Succeeded (success_date date)
Truncate table Failed
insert into Failed (fail_date) values ('2018-12-28')
insert into Failed (fail_date) values ('2018-12-29')
insert into Failed (fail_date) values ('2019-01-04')
insert into Failed (fail_date) values ('2019-01-05')
Truncate table Succeeded
insert into Succeeded (success_date) values ('2018-12-30')
insert into Succeeded (success_date) values ('2018-12-31')
insert into Succeeded (success_date) values ('2019-01-01')
insert into Succeeded (success_date) values ('2019-01-02')
insert into Succeeded (success_date) values ('2019-01-03')
insert into Succeeded (success_date) values ('2019-01-06')
分析:
① 首先先加一列状态列 同时union all连接 两张表
复制代码
select success_date date, 'succeeded' as state
from Succeeded
union all
select *, 'failed' as failed
from Failed
②根据日期排序 同时筛选数据
复制代码
with t1 as (
select success_date date, 'succeeded' as state
from Succeeded
union all
select *, 'failed' as failed
from Failed)
select date, state
from t1
where date between '2019-01-01' and '2019-12-31'
order by date
③根据状态分组 根据日期排名
复制代码
with t1 as (select success_date date, 'succeeded' as state
from Succeeded
union all
select *, 'failed' as failed
from Failed)
, t2 as (
select date, state
from t1
where date between '2019-01-01' and '2019-12-31'
order by date)
select *, row_number() over (partition by state order by date) r1
from t2
④ 构造差值 date 减去r1 求一个辅助日期 如果辅助日期相同 说明是连续的
复制代码
with t1 as (select success_date date, 'succeeded' as state
from Succeeded
union all
select *, 'failed' as failed
from Failed)
, t2 as (
select date, state
from t1
where date between '2019-01-01' and '2019-12-31'
order by date)
, t3 as (
select *, row_number() over (partition by state order by date) r1
from t2)
select *, date_sub(date, interval r1 day) r2
from t3
order by date
select distinct state period_state,
first_value(date) over (partition by state,r2 order by date) start_date,
max(date) over (partition by state,r2 ) end_date
# last_value(date) over (partition by state,r2 order by date rows between unbounded preceding and unbounded following ) end_date
# 提供两种方法
from t4
order by start_date
代码:
sql复制代码
with t1 as (select success_date date, 'succeeded' as state
from Succeeded
union all
select *, 'failed' as failed
from Failed)
, t2 as (select date, state
from t1
where date between '2019-01-01' and '2019-12-31'
order by date)
, t3 as (select *, row_number() over (partition by state order by date) r1
from t2)
, t4 as (select *, date_sub(date, interval r1 day) r2
from t3
order by date)
select distinct state period_state,
first_value(date) over (partition by state,r2 order by date) start_date,
max(date) over (partition by state,r2 ) end_date
# last_value(date) over (partition by state,r2 order by date rows between unbounded preceding and unbounded following ) end_date
from t4
order by start_date;
总结:
①最后求end_date 时用last_value就会出错 换了一种写法用的max
②碰到日期 求最大 最小 可以优先考虑max min函数
③注意排序 不然数据多的时候 会出现错乱
④first_value 取第一个值 注意排序
⑤last_value 取最后一个值 它默认范围是
rows between unbounded preceding and current row
要想使用它 需要重新设置范围 如下
order by date rows between unbounded preceding and unbounded following