sql练习专场(一) (16-20)

第十六题:同时在线问题

create table sql1_16 (
    id   int,
    stt  string,
    edt string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_16.txt' into table sql1_16;

id          stt                 edt
1001,2021-06-14 12:12:12,2021-06-14 18:12:12
1003,2021-06-14 13:12:12,2021-06-14 16:12:12
1004,2021-06-14 13:15:12,2021-06-14 20:12:12
1002,2021-06-14 15:12:12,2021-06-14 16:12:12
1005,2021-06-14 15:18:12,2021-06-14 20:12:12
1001,2021-06-14 20:12:12,2021-06-14 23:12:12
1006,2021-06-14 21:12:12,2021-06-14 23:15:12
1007,2021-06-14 22:12:12,2021-06-14 23:10:12

计算出平台最高峰同时在线的主播人数。将开始时间和结束时间分开,变成两条数据,进行累加即可。

-- 计算出平台最高峰同时在线的主播人数。

with t1 as (
    select id,stt,1 num from sql1_16
    union all
    select id,edt,-1 num from sql1_16
),t2 as (
    select *,sum(num) over ( order by stt) n from t1
)
select max(n) from t2;

第十七题:车辆每天上传N条数据,要求获得每日最后一条数据,请用至少三种方式写出来,并说明三种方式的区别

create table sql1_17 (
    vin   string,
    pkgts  string,
    value string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_17.txt' into table sql1_17;


vin                pkgts             value
1111111111111111,2022-06-01 16:54:57,60.0
1111111111111111,2022-06-01 07:40:27,6.0
2222222222222222,2022-06-01 19:40:27,7.0
1111111111111111,2022-06-26 12:16:45,64.0
2222222222222222,2022-06-05 23:00:42,18.0
2222222222222222,2022-06-05 09:10:17,44.0
2222222222222222,2022-06-05 13:00:25,19.0
2222222222222222,2022-06-13 16:14:04,43.0
2222222222222222,2022-06-13 07:21:08,40.0

-- 获得每日最后一条数据,请用至少三种方式写出来,并说明三种方式的区别

方式一  group by + exists

with t1 as
    ( select max(pkgts) max from sql1_17 group by substr(pkgts,1,10) )
select  * from sql1_17 where exists (select * from t1 where pkgts = t1.max);

方式二: 窗口函数row_number

with t1 as ( select *,row_number() over (partition by substr(pkgts,1,10) order by pkgts desc  ) num from sql1_17 )
select vin, pkgts, value from t1 where num =1;

方式三: 窗口函数first_value

with t1 as
    ( select *,first_value(pkgts) over (partition by substr(pkgts,1,10) order by pkgts desc)pkgts1  from sql1_17  )
select vin, pkgts, value from t1 where pkgts = pkgts1;

第十八题:打折日期交叉问题

create table sql1_18(
 brand string,
 stt string,
 edt string
)
row format delimited
fields terminated by ",";
load data local inpath "/home/homedata/sql_1/sql1_18.txt" into table sql1_18;

brand stt        edt
oppo,2021-06-05,2021-06-09
oppo,2021-06-11,2021-06-21
vivo,2021-06-05,2021-06-15
vivo,2021-06-09,2021-06-21
redmi,2021-06-05,2021-06-21
redmi,2021-06-09,2021-06-15
redmi,2021-06-17,2021-06-26
huawei,2021-06-05,2021-06-26
huawei,2021-06-09,2021-06-15
huawei,2021-06-17,2021-06-21

这道题目主要的难点就是在于日期的交叉,现在有两个思路,第一种是(F-A+1)+(H-G+1),第二种表示是(B-A)+(C-B)+(D-C)+(E-D)+(F-E)+1+(H-G)+1

不论是哪一个思路都需要将开始日期和结束日期分开,然后进行汇总,如下展示:

with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
)select *,sum(status) over (partition by brand order by stt ) gs from t1;

第一种思路的解法:

首先需要获取出每一段的起始和结束时间,将数据中的status=gs=1 和 gs=0的数据筛选出来。同时将起始时间和结束时间相结合

with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
)
select brand,stt,`if`(status = 1,lead(stt,1,null) over (partition by brand order by stt),null) edt from t2 where status = gs or gs = 0;

然后将其中有null的数据排除,进行日期的相减即可

with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
),t3 as (
    select brand,stt,`if`(status = 1,lead(stt,1,null) over (partition by brand order by stt),null) edt from t2 where status = gs or gs = 0
)
select brand,sum(datediff(edt,stt)+1) day from t3 where edt is not null group by brand;

第二种思路的解法:

思路:

SQL打折日期交叉问题_sql题各品牌打折总天数-CSDN博客

with t1 as (
    select brand,stt,1 status from sql1_18
    union all
    select brand,edt,-1 status from sql1_18
),t2 as (
    select *,sum(status) over (partition by brand order by stt ) gs from t1
),t3 as (
    select *,`if`(gs!=0,lead(stt,1,stt) over (partition by brand order by stt),stt )  ett from t2
),t4 as (
    select *,datediff(ett,stt) days from t3
)
select brand,sum(days)+count(`if`(gs=0,1,null)) tds from t4 group by brand;

第十九题:删除重复数据,name列相同的为重复数据

CREATE TABLE sql1_19 (
  `id`    int,
  `name` string
) ;
INSERT INTO sql1_19 VALUES ('1', 'A')
,('2', 'A')
,('3', 'A')
,('4', 'B')
,('5', 'B')
,('6', 'C')
,('7', 'B')
,('8', 'B')
,('9', 'B')
,('10', 'E')
,('11', 'E')
,('12', 'E');
select * from sql1_19;

删除name相同的数据,重复数据只保留一条id最大的

select max(id) id ,name from sql1_19 group by name;

第二十题:差值分组问题

create table sql1_20(
    id int,
    ts string
)
row format delimited
fields terminated by ',';
load data local inpath '/home/homedata/sql_1/sql1_20.txt' into table sql1_20;
select * from sql1_20;

id    ts(秒)
1001,17523641234
1001,17523641256
1002,17523641278
1001,17523641334
1002,17523641434
1001,17523641534
1001,17523641544
1002,17523641634
1001,17523641638
1001,17523641654

每两条数据间隔60秒以内则为同一组------sum + over(order)时窗口大小会逐渐变大,让每一组开始的数据为1 ,同组为0 ,用1隔开所有的组。

with t1 as (
    SELECT  id ,ts ,ts-lag(ts,1,ts) OVER (PARTITION BY id  order BY ts ) seconds FROM sql1_20
) ,t2 as (
    select id, ts,`if`(seconds <60 ,0,1) groupa from t1
)
select id,ts,sum(groupa)over (partition by id order by ts)+1 groups from t2;
相关推荐
工业互联网专业7 分钟前
Python毕业设计选题:基于大数据的旅游景区推荐系统_django
大数据·vue.js·python·django·毕业设计·源码·课程设计
floret*12 分钟前
在 hiveSQL 中判断一个字段是否包含某个值
前端·javascript·数据库
The Open Group16 分钟前
企业如何通过架构蓝图实现数字化转型
大数据·人工智能·分布式·微服务·云原生·架构·数字化转型
冰红茶兑滴水31 分钟前
MySQL 数据库之库操作
数据库·mysql
HZZD_HZZD33 分钟前
出租房管理系统有哪些?
大数据
全域观察1 小时前
两台手机如何提词呢,一台手机后置高清摄像一台手机前置提词+实时监测状态的解决方案来喽
大数据·人工智能·chatgpt·新媒体运营·程序员创富
Fireworkitte1 小时前
MongoDB
数据库·mongodb
笔墨登场说说2 小时前
git sonar maven 配置
大数据·elasticsearch·搜索引擎
VinciYan2 小时前
.NET使用TDengine时序数据库和SqlSugar操作TDengine
大数据·c#·.net·tdengine
来一杯龙舌兰2 小时前
【MongoDB】MongoDB的聚合(Aggregate、Map Reduce)与管道(Pipline) 及索引详解(附详细案例)
数据库·mongodb·mapreduce·索引·aggregate·pipline