sql练习专场(一) (16-20)

第十六题:同时在线问题

create table sql1_16 (
    id   int,
    stt  string,
    edt string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_16.txt' into table sql1_16;

id          stt                 edt
1001,2021-06-14 12:12:12,2021-06-14 18:12:12
1003,2021-06-14 13:12:12,2021-06-14 16:12:12
1004,2021-06-14 13:15:12,2021-06-14 20:12:12
1002,2021-06-14 15:12:12,2021-06-14 16:12:12
1005,2021-06-14 15:18:12,2021-06-14 20:12:12
1001,2021-06-14 20:12:12,2021-06-14 23:12:12
1006,2021-06-14 21:12:12,2021-06-14 23:15:12
1007,2021-06-14 22:12:12,2021-06-14 23:10:12

计算出平台最高峰同时在线的主播人数。将开始时间和结束时间分开,变成两条数据,进行累加即可。

-- 计算出平台最高峰同时在线的主播人数。

with t1 as (
    select id,stt,1 num from sql1_16
    union all
    select id,edt,-1 num from sql1_16
),t2 as (
    select *,sum(num) over ( order by stt) n from t1
)
select max(n) from t2;

第十七题:车辆每天上传N条数据,要求获得每日最后一条数据,请用至少三种方式写出来,并说明三种方式的区别

create table sql1_17 (
    vin   string,
    pkgts  string,
    value string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_17.txt' into table sql1_17;


vin                pkgts             value
1111111111111111,2022-06-01 16:54:57,60.0
1111111111111111,2022-06-01 07:40:27,6.0
2222222222222222,2022-06-01 19:40:27,7.0
1111111111111111,2022-06-26 12:16:45,64.0
2222222222222222,2022-06-05 23:00:42,18.0
2222222222222222,2022-06-05 09:10:17,44.0
2222222222222222,2022-06-05 13:00:25,19.0
2222222222222222,2022-06-13 16:14:04,43.0
2222222222222222,2022-06-13 07:21:08,40.0

-- 获得每日最后一条数据,请用至少三种方式写出来,并说明三种方式的区别

方式一  group by + exists

with t1 as
    ( select max(pkgts) max from sql1_17 group by substr(pkgts,1,10) )
select  * from sql1_17 where exists (select * from t1 where pkgts = t1.max);

方式二: 窗口函数row_number

with t1 as ( select *,row_number() over (partition by substr(pkgts,1,10) order by pkgts desc  ) num from sql1_17 )
select vin, pkgts, value from t1 where num =1;

方式三: 窗口函数first_value

with t1 as
    ( select *,first_value(pkgts) over (partition by substr(pkgts,1,10) order by pkgts desc)pkgts1  from sql1_17  )
select vin, pkgts, value from t1 where pkgts = pkgts1;

第十八题:打折日期交叉问题

create table sql1_18(
 brand string,
 stt string,
 edt string
)
row format delimited
fields terminated by ",";
load data local inpath "/home/homedata/sql_1/sql1_18.txt" into table sql1_18;

brand stt        edt
oppo,2021-06-05,2021-06-09
oppo,2021-06-11,2021-06-21
vivo,2021-06-05,2021-06-15
vivo,2021-06-09,2021-06-21
redmi,2021-06-05,2021-06-21
redmi,2021-06-09,2021-06-15
redmi,2021-06-17,2021-06-26
huawei,2021-06-05,2021-06-26
huawei,2021-06-09,2021-06-15
huawei,2021-06-17,2021-06-21

这道题目主要的难点就是在于日期的交叉,现在有两个思路,第一种是(F-A+1)+(H-G+1),第二种表示是(B-A)+(C-B)+(D-C)+(E-D)+(F-E)+1+(H-G)+1

不论是哪一个思路都需要将开始日期和结束日期分开,然后进行汇总,如下展示:

with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
)select *,sum(status) over (partition by brand order by stt ) gs from t1;

第一种思路的解法:

首先需要获取出每一段的起始和结束时间,将数据中的status=gs=1 和 gs=0的数据筛选出来。同时将起始时间和结束时间相结合

with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
)
select brand,stt,`if`(status = 1,lead(stt,1,null) over (partition by brand order by stt),null) edt from t2 where status = gs or gs = 0;

然后将其中有null的数据排除,进行日期的相减即可

with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
),t3 as (
    select brand,stt,`if`(status = 1,lead(stt,1,null) over (partition by brand order by stt),null) edt from t2 where status = gs or gs = 0
)
select brand,sum(datediff(edt,stt)+1) day from t3 where edt is not null group by brand;

第二种思路的解法:

思路:

SQL打折日期交叉问题_sql题各品牌打折总天数-CSDN博客

with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
),t3 as (
    select *,`if`(gs!=0,lead(stt,1,stt) over (partition by brand order by stt),stt )  ett from t2
),t4 as (
    select *,datediff(ett,stt) days from t3
)
select brand,sum(days)+count(`if`(gs=0,1,null)) tds from t4 group by brand;

第十九题:删除重复数据,name列相同的为重复数据

CREATE TABLE sql1_19 (
  `id`    int,
  `name` string
) ;
INSERT INTO sql1_19 VALUES ('1', 'A')
,('2', 'A')
,('3', 'A')
,('4', 'B')
,('5', 'B')
,('6', 'C')
,('7', 'B')
,('8', 'B')
,('9', 'B')
,('10', 'E')
,('11', 'E')
,('12', 'E');
select * from sql1_19;

删除name相同的数据,重复数据只保留一条id最大的

select max(id) id ,name from sql1_19 group by name;

第二十题:差值分组问题

create table sql1_20(
    id int,
    ts string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_20.txt' into table sql1_20;
select * from sql1_20;

id    ts(秒)
1001,17523641234
1001,17523641256
1002,17523641278
1001,17523641334
1002,17523641434
1001,17523641534
1001,17523641544
1002,17523641634
1001,17523641638
1001,17523641654

每两条数据间隔60秒以内则为同一组------sum + over(order)时窗口大小会逐渐变大,让每一组开始的数据为1 ,同组为0 ,用1隔开所有的组。

with t1 as (
    SELECT  id ,ts ,ts-lag(ts,1,ts) OVER (PARTITION BY id  order BY ts ) seconds FROM sql1_20
) ,t2 as (
    select id, ts,`if`(seconds <60 ,0,1) groupa from t1
)
select id,ts,sum(groupa)over (partition by id order by ts)+1 groups from t2;
相关推荐
szxinmai主板定制专家4 分钟前
【国产NI替代】基于FPGA的32通道(24bits)高精度终端采集核心板卡
大数据·人工智能·fpga开发
亽仒凣凣5 分钟前
Windows安装Redis图文教程
数据库·windows·redis
亦世凡华、14 分钟前
MySQL--》如何在MySQL中打造高效优化索引
数据库·经验分享·mysql·索引·性能分析
YashanDB16 分钟前
【YashanDB知识库】Mybatis-Plus调用YashanDB怎么设置分页
数据库·yashandb·崖山数据库
ProtonBase27 分钟前
如何从 0 到 1 ,打造全新一代分布式数据架构
java·网络·数据库·数据仓库·分布式·云原生·架构
TGB-Earnest2 小时前
【py脚本+logstash+es实现自动化检测工具】
大数据·elasticsearch·自动化
大圣数据星球4 小时前
Fluss 写入数据湖实战
大数据·设计模式·flink
suweijie7684 小时前
SpringCloudAlibaba | Sentinel从基础到进阶
java·大数据·sentinel
云和数据.ChenGuang6 小时前
Django 应用安装脚本 – 如何将应用添加到 INSTALLED_APPS 设置中 原创
数据库·django·sqlite
woshilys6 小时前
sql server 查询对象的修改时间
运维·数据库·sqlserver