Hive 中级练习题(40题 待更新)

前言

最近快一周没更了,主要原因是最近在忙另一件事情(关于JavaFX桌面软件开发),眼看大三上一半时间就要过去了,抓紧先学Hive,完了把 Spark 剩下的补了,还有 Kafka、Flume,任务还是不少的,最后再放假前做一个小的大数据项目,希望一切顺利。

环境准备

这部分直接上代码,反正也没人看哈哈哈。

sql 复制代码
-- SQL 中级
show databases ;

create database practice2;
use practice2;

create table user_info(
    `user_id`  string COMMENT '用户id',
    `gender`   string COMMENT '性别',
    `birthday` string COMMENT '生日'
) COMMENT '用户信息表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

insert overwrite table user_info
values ('101', '男', '1990-01-01'),
       ('102', '女', '1991-02-01'),
       ('103', '女', '1992-03-01'),
       ('104', '男', '1993-04-01'),
       ('105', '女', '1994-05-01'),
       ('106', '男', '1995-06-01'),
       ('107', '女', '1996-07-01'),
       ('108', '男', '1997-08-01'),
       ('109', '女', '1998-09-01'),
       ('1010', '男', '1999-10-01');

CREATE TABLE sku_info(
    `sku_id`      string COMMENT '商品id',
    `name`        string COMMENT '商品名称',
    `category_id` string COMMENT '所属分类id',
    `from_date`   string COMMENT '上架日期',
    `price`       double COMMENT '商品单价'
) COMMENT '商品属性表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

insert overwrite table sku_info
values ('1', 'xiaomi 10', '1', '2020-01-01', 2000),
       ('2', '手机壳', '1', '2020-02-01', 10),
       ('3', 'apple 12', '1', '2020-03-01', 5000),
       ('4', 'xiaomi 13', '1', '2020-04-01', 6000),
       ('5', '破壁机', '2', '2020-01-01', 500),
       ('6', '洗碗机', '2', '2020-02-01', 2000),
       ('7', '热水壶', '2', '2020-03-01', 100),
       ('8', '微波炉', '2', '2020-04-01', 600),
       ('9', '自行车', '3', '2020-01-01', 1000),
       ('10', '帐篷', '3', '2020-02-01', 100),
       ('11', '烧烤架', '3', '2020-02-01', 50),
       ('12', '遮阳伞', '3', '2020-03-01', 20);

create table category_info(
    `category_id`   string,
    `category_name` string
) COMMENT '品类表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

insert overwrite table category_info
values ('1','数码'),
       ('2','厨卫'),
       ('3','户外');

create table order_info(
    `order_id`     string COMMENT '订单id',
    `user_id`      string COMMENT '用户id',
    `create_date`  string COMMENT '下单日期',
    `total_amount` decimal(16, 2) COMMENT '订单总金额'
) COMMENT '订单表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

insert overwrite table order_info
values ('1', '101', '2021-09-27', 29000.00),
       ('2', '101', '2021-09-28', 70500.00),
       ('3', '101', '2021-09-29', 43300.00),
       ('4', '101', '2021-09-30', 860.00),
       ('5', '102', '2021-10-01', 46180.00),
       ('6', '102', '2021-10-01', 50000.00),
       ('7', '102', '2021-10-01', 75500.00),
       ('8', '102', '2021-10-02', 6170.00),
       ('9', '103', '2021-10-02', 18580.00),
       ('10', '103', '2021-10-02', 28000.00),
       ('11', '103', '2021-10-02', 23400.00),
       ('12', '103', '2021-10-03', 5910.00),
       ('13', '104', '2021-10-03', 13000.00),
       ('14', '104', '2021-10-03', 69500.00),
       ('15', '104', '2021-10-03', 2000.00),
       ('16', '104', '2021-10-03', 5380.00),
       ('17', '105', '2021-10-04', 6210.00),
       ('18', '105', '2021-10-04', 68000.00),
       ('19', '105', '2021-10-04', 43100.00),
       ('20', '105', '2021-10-04', 2790.00),
       ('21', '106', '2021-10-04', 9390.00),
       ('22', '106', '2021-10-05', 58000.00),
       ('23', '106', '2021-10-05', 46600.00),
       ('24', '106', '2021-10-05', 5160.00),
       ('25', '107', '2021-10-05', 55350.00),
       ('26', '107', '2021-10-05', 14500.00),
       ('27', '107', '2021-10-06', 47400.00),
       ('28', '107', '2021-10-06', 6900.00),
       ('29', '108', '2021-10-06', 56570.00),
       ('30', '108', '2021-10-06', 44500.00),
       ('31', '108', '2021-10-07', 50800.00),
       ('32', '108', '2021-10-07', 3900.00),
       ('33', '109', '2021-10-07', 41480.00),
       ('34', '109', '2021-10-07', 88000.00),
       ('35', '109', '2020-10-08', 15000.00),
       ('36', '109', '2020-10-08', 9020.00),
       ('37', '1010', '2020-10-08', 9260.00),
       ('38', '1010', '2020-10-08', 12000.00),
       ('39', '1010', '2020-10-08', 23900.00),
       ('40', '1010', '2020-10-08', 6790.00);

CREATE TABLE order_detail
(
    `order_detail_id` string COMMENT '订单明细id',
    `order_id`        string COMMENT '订单id',
    `sku_id`          string COMMENT '商品id',
    `create_date`     string COMMENT '下单日期',
    `price`           decimal(16, 2) COMMENT '下单时的商品单价',
    `sku_num`         int COMMENT '下单商品件数'
) COMMENT '订单明细表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

INSERT overwrite table order_detail
values ('1', '1', '1', '2021-09-27', 2000.00, 2),
       ('2', '1', '3', '2021-09-27', 5000.00, 5),
       ('3', '2', '4', '2021-09-28', 6000.00, 9),
       ('4', '2', '5', '2021-09-28', 500.00, 33),
       ('5', '3', '7', '2021-09-29', 100.00, 37),
       ('6', '3', '8', '2021-09-29', 600.00, 46),
       ('7', '3', '9', '2021-09-29', 1000.00, 12),
       ('8', '4', '12', '2021-09-30', 20.00, 43),
       ('9', '5', '1', '2021-10-01', 2000.00, 8),
       ('10', '5', '2', '2021-10-01', 10.00, 18),
       ('11', '5', '3', '2021-10-01', 5000.00, 6),
       ('12', '6', '4', '2021-10-01', 6000.00, 8),
       ('13', '6', '6', '2021-10-01', 2000.00, 1),
       ('14', '7', '7', '2021-10-01', 100.00, 17),
       ('15', '7', '8', '2021-10-01', 600.00, 48),
       ('16', '7', '9', '2021-10-01', 1000.00, 45),
       ('17', '8', '10', '2021-10-02', 100.00, 48),
       ('18', '8', '11', '2021-10-02', 50.00, 15),
       ('19', '8', '12', '2021-10-02', 20.00, 31),
       ('20', '9', '1', '2021-09-30', 2000.00, 9),
       ('21', '9', '2', '2021-10-02', 10.00, 5800),
       ('22', '10', '4', '2021-10-02', 6000.00, 1),
       ('23', '10', '5', '2021-10-02', 500.00, 24),
       ('24', '10', '6', '2021-10-02', 2000.00, 5),
       ('25', '11', '8', '2021-10-02', 600.00, 39),
       ('26', '12', '10', '2021-10-03', 100.00, 47),
       ('27', '12', '11', '2021-10-03', 50.00, 19),
       ('28', '12', '12', '2021-10-03', 20.00, 13000),
       ('29', '13', '1', '2021-10-03', 2000.00, 4),
       ('30', '13', '3', '2021-10-03', 5000.00, 1),
       ('31', '14', '4', '2021-10-03', 6000.00, 5),
       ('32', '14', '5', '2021-10-03', 500.00, 47),
       ('33', '14', '6', '2021-10-03', 2000.00, 8),
       ('34', '15', '7', '2021-10-03', 100.00, 20),
       ('35', '16', '10', '2021-10-03', 100.00, 22),
       ('36', '16', '11', '2021-10-03', 50.00, 42),
       ('37', '16', '12', '2021-10-03', 20.00, 7400),
       ('38', '17', '1', '2021-10-04', 2000.00, 3),
       ('39', '17', '2', '2021-10-04', 10.00, 21),
       ('40', '18', '4', '2021-10-04', 6000.00, 8),
       ('41', '18', '5', '2021-10-04', 500.00, 28),
       ('42', '18', '6', '2021-10-04', 2000.00, 3),
       ('43', '19', '7', '2021-10-04', 100.00, 55),
       ('44', '19', '8', '2021-10-04', 600.00, 11),
       ('45', '19', '9', '2021-10-04', 1000.00, 31),
       ('46', '20', '11', '2021-10-04', 50.00, 45),
       ('47', '20', '12', '2021-10-04', 20.00, 27),
       ('48', '21', '1', '2021-10-04', 2000.00, 2),
       ('49', '21', '2', '2021-10-04', 10.00, 39),
       ('50', '21', '3', '2021-10-04', 5000.00, 1),
       ('51', '22', '4', '2021-10-05', 6000.00, 8),
       ('52', '22', '5', '2021-10-05', 500.00, 20),
       ('53', '23', '7', '2021-10-05', 100.00, 58),
       ('54', '23', '8', '2021-10-05', 600.00, 18),
       ('55', '23', '9', '2021-10-05', 1000.00, 30),
       ('56', '24', '10', '2021-10-05', 100.00, 27),
       ('57', '24', '11', '2021-10-05', 50.00, 28),
       ('58', '24', '12', '2021-10-05', 20.00, 53),
       ('59', '25', '1', '2021-10-05', 2000.00, 5),
       ('60', '25', '2', '2021-10-05', 10.00, 35),
       ('61', '25', '3', '2021-10-05', 5000.00, 9),
       ('62', '26', '4', '2021-10-05', 6000.00, 1),
       ('63', '26', '5', '2021-10-05', 500.00, 13),
       ('64', '26', '6', '2021-10-05', 2000.00, 1),
       ('65', '27', '7', '2021-10-06', 100.00, 30),
       ('66', '27', '8', '2021-10-06', 600.00, 19),
       ('67', '27', '9', '2021-10-06', 1000.00, 33),
       ('68', '28', '10', '2021-10-06', 100.00, 37),
       ('69', '28', '11', '2021-10-06', 50.00, 46),
       ('70', '28', '12', '2021-10-06', 20.00, 45),
       ('71', '29', '1', '2021-10-06', 2000.00, 8),
       ('72', '29', '2', '2021-10-06', 10.00, 57),
       ('73', '29', '3', '2021-10-06', 5000.00, 8),
       ('74', '30', '4', '2021-10-06', 6000.00, 3),
       ('75', '30', '5', '2021-10-06', 500.00, 33),
       ('76', '30', '6', '2021-10-06', 2000.00, 5),
       ('77', '31', '8', '2021-10-07', 600.00, 13),
       ('78', '31', '9', '2021-10-07', 1000.00, 43),
       ('79', '32', '10', '2021-10-07', 100.00, 24),
       ('80', '32', '11', '2021-10-07', 50.00, 30),
       ('81', '33', '1', '2021-10-07', 2000.00, 8),
       ('82', '33', '2', '2021-10-07', 10.00, 48),
       ('83', '33', '3', '2021-10-07', 5000.00, 5),
       ('84', '34', '4', '2021-10-07', 6000.00, 10),
       ('85', '34', '5', '2021-10-07', 500.00, 44),
       ('86', '34', '6', '2021-10-07', 2000.00, 3),
       ('87', '35', '8', '2020-10-08', 600.00, 25),
       ('88', '36', '10', '2020-10-08', 100.00, 57),
       ('89', '36', '11', '2020-10-08', 50.00, 44),
       ('90', '36', '12', '2020-10-08', 20.00, 56),
       ('91', '37', '1', '2020-10-08', 2000.00, 2),
       ('92', '37', '2', '2020-10-08', 10.00, 26),
       ('93', '37', '3', '2020-10-08', 5000.00, 1),
       ('94', '38', '6', '2020-10-08', 2000.00, 6),
       ('95', '39', '7', '2020-10-08', 100.00, 35),
       ('96', '39', '8', '2020-10-08', 600.00, 34),
       ('97', '40', '10', '2020-10-08', 100.00, 37),
       ('98', '40', '11', '2020-10-08', 50.00, 51),
       ('99', '40', '12', '2020-10-08', 20.00, 27);

CREATE TABLE user_login_detail
(
    `user_id`    string comment '用户id',
    `ip_address` string comment 'ip地址',
    `login_ts`   string comment '登录时间',
    `logout_ts`  string comment '登出时间'
) COMMENT '用户登录明细表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

INSERT overwrite table user_login_detail
VALUES ('101', '180.149.130.161', '2021-09-21 08:00:00', '2021-09-27 08:30:00'),
       ('101', '180.149.130.161', '2021-09-27 08:00:00', '2021-09-27 08:30:00'),
       ('101', '180.149.130.161', '2021-09-28 09:00:00', '2021-09-28 09:10:00'),
       ('101', '180.149.130.161', '2021-09-29 13:30:00', '2021-09-29 13:50:00'),
       ('101', '180.149.130.161', '2021-09-30 20:00:00', '2021-09-30 20:10:00'),
       ('102', '120.245.11.2', '2021-09-22 09:00:00', '2021-09-27 09:30:00'),
       ('102', '120.245.11.2', '2021-10-01 08:00:00', '2021-10-01 08:30:00'),
       ('102', '180.149.130.174', '2021-10-01 07:50:00', '2021-10-01 08:20:00'),
       ('102', '120.245.11.2', '2021-10-02 08:00:00', '2021-10-02 08:30:00'),
       ('103', '27.184.97.3', '2021-09-23 10:00:00', '2021-09-27 10:30:00'),
       ('103', '27.184.97.3', '2021-10-03 07:50:00', '2021-10-03 09:20:00'),
       ('104', '27.184.97.34', '2021-09-24 11:00:00', '2021-09-27 11:30:00'),
       ('104', '27.184.97.34', '2021-10-03 07:50:00', '2021-10-03 08:20:00'),
       ('104', '27.184.97.34', '2021-10-03 08:50:00', '2021-10-03 10:20:00'),
       ('104', '120.245.11.89', '2021-10-03 08:40:00', '2021-10-03 10:30:00'),
       ('105', '119.180.192.212', '2021-10-04 09:10:00', '2021-10-04 09:30:00'),
       ('106', '119.180.192.66', '2021-10-04 08:40:00', '2021-10-04 10:30:00'),
       ('106', '119.180.192.66', '2021-10-05 21:50:00', '2021-10-05 22:40:00'),
       ('107', '219.134.104.7', '2021-09-25 12:00:00', '2021-09-27 12:30:00'),
       ('107', '219.134.104.7', '2021-10-05 22:00:00', '2021-10-05 23:00:00'),
       ('107', '219.134.104.7', '2021-10-06 09:10:00', '2021-10-06 10:20:00'),
       ('107', '27.184.97.46', '2021-10-06 09:00:00', '2021-10-06 10:00:00'),
       ('108', '101.227.131.22', '2021-10-06 09:00:00', '2021-10-06 10:00:00'),
       ('108', '101.227.131.22', '2021-10-06 22:00:00', '2021-10-06 23:00:00'),
       ('109', '101.227.131.29', '2021-09-26 13:00:00', '2021-09-27 13:30:00'),
       ('109', '101.227.131.29', '2021-10-06 08:50:00', '2021-10-06 10:20:00'),
       ('109', '101.227.131.29', '2021-10-08 09:00:00', '2021-10-08 09:10:00'),
       ('1010', '119.180.192.10', '2021-09-27 14:00:00', '2021-09-27 14:30:00'),
       ('1010', '119.180.192.10', '2021-10-09 08:50:00', '2021-10-09 10:20:00');

CREATE TABLE sku_price_modify_detail
(
    `sku_id`      string comment '商品id',
    `new_price`   decimal(16, 2) comment '更改后的价格',
    `change_date` string comment '变动日期'
) COMMENT '商品价格变更明细表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

insert overwrite table sku_price_modify_detail
values ('1', 1900, '2021-09-25'),
       ('1', 2000, '2021-09-26'),
       ('2', 80, '2021-09-29'),
       ('2', 10, '2021-09-30'),
       ('3', 4999, '2021-09-25'),
       ('3', 5000, '2021-09-26'),
       ('4', 5600, '2021-09-26'),
       ('4', 6000, '2021-09-27'),
       ('5', 490, '2021-09-27'),
       ('5', 500, '2021-09-28'),
       ('6', 1988, '2021-09-30'),
       ('6', 2000, '2021-10-01'),
       ('7', 88, '2021-09-28'),
       ('7', 100, '2021-09-29'),
       ('8', 800, '2021-09-28'),
       ('8', 600, '2021-09-29'),
       ('9', 1100, '2021-09-27'),
       ('9', 1000, '2021-09-28'),
       ('10', 90, '2021-10-01'),
       ('10', 100, '2021-10-02'),
       ('11', 66, '2021-10-01'),
       ('11', 50, '2021-10-02'),
       ('12', 35, '2021-09-28'),
       ('12', 20, '2021-09-29');

CREATE TABLE delivery_info
(
    `delivery_id` string comment '配送单id',
    `order_id`    string comment '订单id',
    `user_id`     string comment '用户id',
    `order_date`  string comment '下单日期',
    `custom_date` string comment '期望配送日期'
) COMMENT '邮寄信息表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

insert overwrite table delivery_info
values ('1', '1', '101', '2021-09-27', '2021-09-29'),
       ('2', '2', '101', '2021-09-28', '2021-09-28'),
       ('3', '3', '101', '2021-09-29', '2021-09-30'),
       ('4', '4', '101', '2021-09-30', '2021-10-01'),
       ('5', '5', '102', '2021-10-01', '2021-10-01'),
       ('6', '6', '102', '2021-10-01', '2021-10-01'),
       ('7', '7', '102', '2021-10-01', '2021-10-03'),
       ('8', '8', '102', '2021-10-02', '2021-10-02'),
       ('9', '9', '103', '2021-10-02', '2021-10-03'),
       ('10', '10', '103', '2021-10-02', '2021-10-04'),
       ('11', '11', '103', '2021-10-02', '2021-10-02'),
       ('12', '12', '103', '2021-10-03', '2021-10-03'),
       ('13', '13', '104', '2021-10-03', '2021-10-04'),
       ('14', '14', '104', '2021-10-03', '2021-10-04'),
       ('15', '15', '104', '2021-10-03', '2021-10-03'),
       ('16', '16', '104', '2021-10-03', '2021-10-03'),
       ('17', '17', '105', '2021-10-04', '2021-10-04'),
       ('18', '18', '105', '2021-10-04', '2021-10-06'),
       ('19', '19', '105', '2021-10-04', '2021-10-06'),
       ('20', '20', '105', '2021-10-04', '2021-10-04'),
       ('21', '21', '106', '2021-10-04', '2021-10-04'),
       ('22', '22', '106', '2021-10-05', '2021-10-05'),
       ('23', '23', '106', '2021-10-05', '2021-10-05'),
       ('24', '24', '106', '2021-10-05', '2021-10-07'),
       ('25', '25', '107', '2021-10-05', '2021-10-05'),
       ('26', '26', '107', '2021-10-05', '2021-10-06'),
       ('27', '27', '107', '2021-10-06', '2021-10-06'),
       ('28', '28', '107', '2021-10-06', '2021-10-07'),
       ('29', '29', '108', '2021-10-06', '2021-10-06'),
       ('30', '30', '108', '2021-10-06', '2021-10-06'),
       ('31', '31', '108', '2021-10-07', '2021-10-09'),
       ('32', '32', '108', '2021-10-07', '2021-10-09'),
       ('33', '33', '109', '2021-10-07', '2021-10-08'),
       ('34', '34', '109', '2021-10-07', '2021-10-08'),
       ('35', '35', '109', '2021-10-08', '2021-10-10'),
       ('36', '36', '109', '2021-10-08', '2021-10-09'),
       ('37', '37', '1010', '2021-10-08', '2021-10-10'),
       ('38', '38', '1010', '2021-10-08', '2021-10-10'),
       ('39', '39', '1010', '2021-10-08', '2021-10-09'),
       ('40', '40', '1010', '2021-10-08', '2021-10-09');

CREATE TABLE friendship_info(
    `user1_id` string comment '用户1id',
    `user2_id` string comment '用户2id'
) COMMENT '用户关系表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

insert overwrite table friendship_info
values ('101', '1010'),
       ('101', '108'),
       ('101', '106'),
       ('101', '104'),
       ('101', '102'),
       ('102', '1010'),
       ('102', '108'),
       ('102', '106'),
       ('102', '104'),
       ('102', '102'),
       ('103', '1010'),
       ('103', '108'),
       ('103', '106'),
       ('103', '104'),
       ('103', '102'),
       ('104', '1010'),
       ('104', '108'),
       ('104', '106'),
       ('104', '104'),
       ('104', '102'),
       ('105', '1010'),
       ('105', '108'),
       ('105', '106'),
       ('105', '104'),
       ('105', '102'),
       ('106', '1010'),
       ('106', '108'),
       ('106', '106'),
       ('106', '104'),
       ('106', '102'),
       ('107', '1010'),
       ('107', '108'),
       ('107', '106'),
       ('107', '104'),
       ('107', '102'),
       ('108', '1010'),
       ('108', '108'),
       ('108', '106'),
       ('108', '104'),
       ('108', '102'),
       ('109', '1010'),
       ('109', '108'),
       ('109', '106'),
       ('109', '104'),
       ('109', '102'),
       ('1010', '1010'),
       ('1010', '108'),
       ('1010', '106'),
       ('1010', '104'),
       ('1010', '102');

CREATE TABLE favor_info
(
    `user_id`     string comment '用户id',
    `sku_id`      string comment '商品id',
    `create_date` string comment '收藏日期'
) COMMENT '用户收藏表'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

insert overwrite table favor_info
values ('101', '3', '2021-09-23'),
       ('101', '12', '2021-09-23'),
       ('101', '6', '2021-09-25'),
       ('101', '10', '2021-09-21'),
       ('101', '5', '2021-09-25'),
       ('102', '1', '2021-09-24'),
       ('102', '2', '2021-09-24'),
       ('102', '8', '2021-09-23'),
       ('102', '12', '2021-09-22'),
       ('102', '11', '2021-09-23'),
       ('102', '9', '2021-09-25'),
       ('102', '4', '2021-09-25'),
       ('102', '6', '2021-09-23'),
       ('102', '7', '2021-09-26'),
       ('103', '8', '2021-09-24'),
       ('103', '5', '2021-09-25'),
       ('103', '6', '2021-09-26'),
       ('103', '12', '2021-09-27'),
       ('103', '7', '2021-09-25'),
       ('103', '10', '2021-09-25'),
       ('103', '4', '2021-09-24'),
       ('103', '11', '2021-09-25'),
       ('103', '3', '2021-09-27'),
       ('104', '9', '2021-09-28'),
       ('104', '7', '2021-09-28'),
       ('104', '8', '2021-09-25'),
       ('104', '3', '2021-09-28'),
       ('104', '11', '2021-09-25'),
       ('104', '6', '2021-09-25'),
       ('104', '12', '2021-09-28'),
       ('105', '8', '2021-10-08'),
       ('105', '9', '2021-10-07'),
       ('105', '7', '2021-10-07'),
       ('105', '11', '2021-10-06'),
       ('105', '5', '2021-10-07'),
       ('105', '4', '2021-10-05'),
       ('105', '10', '2021-10-07'),
       ('106', '12', '2021-10-08'),
       ('106', '1', '2021-10-08'),
       ('106', '4', '2021-10-04'),
       ('106', '5', '2021-10-08'),
       ('106', '2', '2021-10-04'),
       ('106', '6', '2021-10-04'),
       ('106', '7', '2021-10-08'),
       ('107', '5', '2021-09-29'),
       ('107', '3', '2021-09-28'),
       ('107', '10', '2021-09-27'),
       ('108', '9', '2021-10-08'),
       ('108', '3', '2021-10-10'),
       ('108', '8', '2021-10-10'),
       ('108', '10', '2021-10-07'),
       ('108', '11', '2021-10-07'),
       ('109', '2', '2021-09-27'),
       ('109', '4', '2021-09-29'),
       ('109', '5', '2021-09-29'),
       ('109', '9', '2021-09-30'),
       ('109', '8', '2021-09-26'),
       ('1010', '2', '2021-09-29'),
       ('1010', '9', '2021-09-29'),
       ('1010', '1', '2021-10-01');

题目

1、查询累积销量排名第二的商品

题目需求

查询订单明细表(order_detail)中销量(下单件数)排名第二的商品id,如果不存在返回null,如果存在多个排名第二的商品则需要全部返回。

代码

这里使用了窗口函数中的排名函数-dense_rank() ,对于相同值的数据,它的排名是相同的,比如100,100,200,它的排名就是 1,1,2,默认是降序的,这里我们需要设置为升序排列。

这里不需要分区。

对于不同订单,它可能会有相同的商品,所以我们需要用一个聚合函数sum来实现。

sql 复制代码
select sku_id from (
    select sku_id,
           order_num,
           dense_rank() over (order by order_num desc) rank
    from (
        select sku_id,
               sum(sku_num) order_num
            from order_detail
            group by sku_id
            )t1
        )
where rank=2;

2、查询至少连续三天下单的用户

题目需求

查询订单信息表(order_info)中最少连续3天下单的用户id。

代码

  1. 找到每个用户的下单日期(需要去重)
sql 复制代码
-- 找到用户的下单日期-需要去重(一天下单多次)
select distinct user_id,
                create_date
from order_info;

-- 第二种去重方式
select user_id, 
       create_date
from order_info
group by user_id,create_date;    --group by 就是把相同的字段值放到一起

--第三种去重方式
select user_id,create_date
from(
select user_id,
       create_date,
       row_number() over (partition by user_id,create_date) rn
from order_info
    )t1
where rn=1;
查看SQL语句执行计划
sql 复制代码
explain select distinct user_id,
                create_date
from order_info;

2.查看连续三天下单的用户

思路:

sql 复制代码
user_id      create_date      lead(create_date,2,'9999-12-31')      datadiff
1001          2023-06-01            2023-06-03                      2
1001          2023-06-02            2023-06-06                      4
1001          2023-06-03            2023-06-07                      4
1001          2023-06-06            2023-06-08                      2
1001          2023-06-07            9999-12-31                      2913****
1001          2023-06-08            9999-12-31                      2913****

代码

sql 复制代码
select distinct user_id
from(
    select user_id,
       datediff(create_date,lead2) diff
    from(
        select user_id,
               create_date,
               lead(create_date,2,'9999-12-31') over (partition by user_id order by create_date) lead2
               from
        (
        select distinct user_id,
                        create_date
        from order_info)t1
        )t2
    )t3
where diff=2;

思路2

sql 复制代码
user_id      create_date      rank()    diff        
1001          2023-06-02      1         2023-06-01
1001          2023-06-03      2         2023-06-01
1001          2023-06-04      3         2023-06-01
1001          2023-06-06      4         2023-06-02
1001          2023-06-07      5         2023-06-02
1001          2023-06-08      6         2023-06-02
sql 复制代码
select distinct user_id
from(
    select user_id,
       diff,
       count(*) cnt
    from (
        select user_id,
                       create_date,
                       date_sub(create_date, rank() over (partition by user_id order by create_date)) diff
                from (
                         select distinct user_id,
                                         create_date
                         from order_info
                     ) t1
            )t2
group by user_id,diff
having cnt
        )t3;

思路3

sql 复制代码
user_id      create_date      count() over(partition by user_id order by create_date range between 86400 preceding and 86400 following )
1001          2023-06-02            2
1001          2023-06-03            3
1001          2023-06-04            2
1001          2023-06-06            2
1001          2023-06-07            3
1001          2023-06-08            2
sql 复制代码
select distinct user_id
from (
         select user_id,
                ts,
                count(*) over (partition by user_id order by ts range between 86400 preceding and 86400 following) cnt
         from (
                  select user_id,
                         unix_timestamp(create_date, 'yyyy-MM-dd') ts
                  from (
                           select distinct user_id,
                                           create_date
                           from order_info
                       ) t1
              ) t2
     )t3
where cnt=3;
相关推荐
喂完待续12 分钟前
Apache Hudi:数据湖的实时革命
大数据·数据仓库·分布式·架构·apache·数据库架构
计艺回忆路2 小时前
从Podman开始一步步构建Hadoop开发集群
hadoop
计算机源码社1 天前
分享一个基于Hadoop的二手房销售签约数据分析与可视化系统,基于Python可视化的二手房销售数据分析平台
大数据·hadoop·python·数据分析·毕业设计项目·毕业设计源码·计算机毕设选题
计算机毕设残哥1 天前
完整技术栈分享:基于Hadoop+Spark的在线教育投融资大数据可视化分析系统
大数据·hadoop·python·信息可视化·spark·计算机毕设·计算机毕业设计
计算机源码社2 天前
分享一个基于Hadoop+spark的超市销售数据分析与可视化系统,超市顾客消费行为分析系统的设计与实现
大数据·hadoop·数据分析·spark·计算机毕业设计源码·计算机毕设选题·大数据选题推荐
beijingliushao2 天前
33-Hive SQL DML语法之查询数据-2
hive·hadoop·sql
Lx3522 天前
如何正确选择Hadoop数据压缩格式:Gzip vs LZO vs Snappy
大数据·hadoop
让头发掉下来2 天前
Hive 创建事务表的方法
大数据·hive·hadoop
Q_Q19632884752 天前
python基于Hadoop的超市数据分析系统
开发语言·hadoop·spring boot·python·django·flask·node.js
计算机毕业设计木哥2 天前
计算机毕设大数据选题推荐 基于spark+Hadoop+python的贵州茅台股票数据分析系统【源码+文档+调试】
大数据·hadoop·python·计算机网络·spark·课程设计