sql练习专场(一) (16-20)

第十六题:同时在线问题

复制代码
create table sql1_16 (
    id   int,
    stt  string,
    edt string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_16.txt' into table sql1_16;

id          stt                 edt
1001,2021-06-14 12:12:12,2021-06-14 18:12:12
1003,2021-06-14 13:12:12,2021-06-14 16:12:12
1004,2021-06-14 13:15:12,2021-06-14 20:12:12
1002,2021-06-14 15:12:12,2021-06-14 16:12:12
1005,2021-06-14 15:18:12,2021-06-14 20:12:12
1001,2021-06-14 20:12:12,2021-06-14 23:12:12
1006,2021-06-14 21:12:12,2021-06-14 23:15:12
1007,2021-06-14 22:12:12,2021-06-14 23:10:12

计算出平台最高峰同时在线的主播人数。将开始时间和结束时间分开,变成两条数据,进行累加即可。

复制代码
-- 计算出平台最高峰同时在线的主播人数。

with t1 as (
    select id,stt,1 num from sql1_16
    union all
    select id,edt,-1 num from sql1_16
),t2 as (
    select *,sum(num) over ( order by stt) n from t1
)
select max(n) from t2;

第十七题:车辆每天上传N条数据,要求获得每日最后一条数据,请用至少三种方式写出来,并说明三种方式的区别

复制代码
create table sql1_17 (
    vin   string,
    pkgts  string,
    value string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_17.txt' into table sql1_17;


vin                pkgts             value
1111111111111111,2022-06-01 16:54:57,60.0
1111111111111111,2022-06-01 07:40:27,6.0
2222222222222222,2022-06-01 19:40:27,7.0
1111111111111111,2022-06-26 12:16:45,64.0
2222222222222222,2022-06-05 23:00:42,18.0
2222222222222222,2022-06-05 09:10:17,44.0
2222222222222222,2022-06-05 13:00:25,19.0
2222222222222222,2022-06-13 16:14:04,43.0
2222222222222222,2022-06-13 07:21:08,40.0

-- 获得每日最后一条数据,请用至少三种方式写出来,并说明三种方式的区别

方式一  group by + exists

with t1 as
    ( select max(pkgts) max from sql1_17 group by substr(pkgts,1,10) )
select  * from sql1_17 where exists (select * from t1 where pkgts = t1.max);

方式二: 窗口函数row_number

with t1 as ( select *,row_number() over (partition by substr(pkgts,1,10) order by pkgts desc  ) num from sql1_17 )
select vin, pkgts, value from t1 where num =1;

方式三: 窗口函数first_value

with t1 as
    ( select *,first_value(pkgts) over (partition by substr(pkgts,1,10) order by pkgts desc)pkgts1  from sql1_17  )
select vin, pkgts, value from t1 where pkgts = pkgts1;

第十八题:打折日期交叉问题

复制代码
create table sql1_18(
 brand string,
 stt string,
 edt string
)
row format delimited
fields terminated by ",";
load data local inpath "/home/homedata/sql_1/sql1_18.txt" into table sql1_18;

brand stt        edt
oppo,2021-06-05,2021-06-09
oppo,2021-06-11,2021-06-21
vivo,2021-06-05,2021-06-15
vivo,2021-06-09,2021-06-21
redmi,2021-06-05,2021-06-21
redmi,2021-06-09,2021-06-15
redmi,2021-06-17,2021-06-26
huawei,2021-06-05,2021-06-26
huawei,2021-06-09,2021-06-15
huawei,2021-06-17,2021-06-21

这道题目主要的难点就是在于日期的交叉,现在有两个思路,第一种是(F-A+1)+(H-G+1),第二种表示是(B-A)+(C-B)+(D-C)+(E-D)+(F-E)+1+(H-G)+1

不论是哪一个思路都需要将开始日期和结束日期分开,然后进行汇总,如下展示:

复制代码
with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
)select *,sum(status) over (partition by brand order by stt ) gs from t1;

第一种思路的解法:

首先需要获取出每一段的起始和结束时间,将数据中的status=gs=1 和 gs=0的数据筛选出来。同时将起始时间和结束时间相结合

复制代码
with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
)
select brand,stt,`if`(status = 1,lead(stt,1,null) over (partition by brand order by stt),null) edt from t2 where status = gs or gs = 0;

然后将其中有null的数据排除,进行日期的相减即可

复制代码
with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
),t3 as (
    select brand,stt,`if`(status = 1,lead(stt,1,null) over (partition by brand order by stt),null) edt from t2 where status = gs or gs = 0
)
select brand,sum(datediff(edt,stt)+1) day from t3 where edt is not null group by brand;

第二种思路的解法:

思路:

SQL打折日期交叉问题_sql题各品牌打折总天数-CSDN博客

复制代码
with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
),t3 as (
    select *,`if`(gs!=0,lead(stt,1,stt) over (partition by brand order by stt),stt )  ett from t2
),t4 as (
    select *,datediff(ett,stt) days from t3
)
select brand,sum(days)+count(`if`(gs=0,1,null)) tds from t4 group by brand;

第十九题:删除重复数据,name列相同的为重复数据

复制代码
CREATE TABLE sql1_19 (
  `id`    int,
  `name` string
) ;
INSERT INTO sql1_19 VALUES ('1', 'A')
,('2', 'A')
,('3', 'A')
,('4', 'B')
,('5', 'B')
,('6', 'C')
,('7', 'B')
,('8', 'B')
,('9', 'B')
,('10', 'E')
,('11', 'E')
,('12', 'E');
select * from sql1_19;

删除name相同的数据,重复数据只保留一条id最大的

复制代码
select max(id) id ,name from sql1_19 group by name;

第二十题:差值分组问题

复制代码
create table sql1_20(
    id int,
    ts string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_20.txt' into table sql1_20;
select * from sql1_20;

id    ts(秒)
1001,17523641234
1001,17523641256
1002,17523641278
1001,17523641334
1002,17523641434
1001,17523641534
1001,17523641544
1002,17523641634
1001,17523641638
1001,17523641654

每两条数据间隔60秒以内则为同一组------sum + over(order)时窗口大小会逐渐变大,让每一组开始的数据为1 ,同组为0 ,用1隔开所有的组。

复制代码
with t1 as (
    SELECT  id ,ts ,ts-lag(ts,1,ts) OVER (PARTITION BY id  order BY ts ) seconds FROM sql1_20
) ,t2 as (
    select id, ts,`if`(seconds <60 ,0,1) groupa from t1
)
select id,ts,sum(groupa)over (partition by id order by ts)+1 groups from t2;
相关推荐
线条14 分钟前
Spark 单机模式安装与测试全攻略
大数据·分布式·spark
代码老y4 分钟前
穿透、误伤与回环——Redis 缓存防御体系的负向路径与治理艺术
数据库·redis·缓存
Themberfue9 分钟前
Redis ①⑥-缓存
数据库·redis·adb·缓存
Kyln.Wu9 分钟前
【python实用小脚本-139】Python 在线图片批量下载器:requests+PIL 一键保存网络图像
数据库·python·php
老周聊架构19 分钟前
大数据领域开山鼻祖组件Hadoop核心架构设计
大数据
李元豪2 小时前
grpo nl2sql qwen3 模型强化学习训练有效果的成立条件有哪些
数据库·oracle
Hello.Reader5 小时前
RedisJSON 路径语法深度解析与实战
数据库·redis·缓存
TDengine (老段)6 小时前
TDengine 使用最佳实践(2)
大数据·数据库·物联网·时序数据库·iot·tdengine·涛思数据
Deng9452013147 小时前
基于大数据的电力系统故障诊断技术研究
大数据·matplotlib·深度特征提取·随机森林分类算法·标签编码
设计师小聂!9 小时前
Linux系统中部署Redis详解
linux·运维·数据库·redis