0 问题描述
基于用户信息表user_info、、试卷作答记录表exam_record、题目练习记录表practice_record,统计 每个6/7级用户总活跃月份数、2021年活跃天数、2021年试卷作答活跃天数、2021年答题活跃天数,结果 按照总活跃月份数、2021年活跃天数降序排序。
1 数据准备
sql
drop table if exists examination_info,user_info,exam_record,practice_record;
CREATE TABLE examination_info (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
exam_id int UNIQUE NOT NULL COMMENT '试卷ID',
tag varchar(32) COMMENT '类别标签',
difficulty varchar(8) COMMENT '难度',
duration int NOT NULL COMMENT '时长',
release_time datetime COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
CREATE TABLE user_info (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int UNIQUE NOT NULL COMMENT '用户ID',
`nick_name` varchar(64) COMMENT '昵称',
achievement int COMMENT '成就值',
level int COMMENT '用户等级',
job varchar(32) COMMENT '职业方向',
register_time datetime COMMENT '注册时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
CREATE TABLE practice_record (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int NOT NULL COMMENT '用户ID',
question_id int NOT NULL COMMENT '题目ID',
submit_time datetime COMMENT '提交时间',
score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
CREATE TABLE exam_record (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int NOT NULL COMMENT '用户ID',
exam_id int NOT NULL COMMENT '试卷ID',
start_time datetime NOT NULL COMMENT '开始时间',
submit_time datetime COMMENT '提交时间',
score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO user_info(uid,`nick_name`,achievement,level,job,register_time) VALUES
(1001, '牛客1号', 3100, 7, '算法', '2020-01-01 10:00:00'),
(1002, '牛客2号', 2300, 7, '算法', '2020-01-01 10:00:00'),
(1003, '牛客3号', 2500, 7, '算法', '2020-01-01 10:00:00'),
(1004, '牛客4号', 1200, 5, '算法', '2020-01-01 10:00:00'),
(1005, '牛客5号', 1600, 6, 'C++', '2020-01-01 10:00:00'),
(1006, '牛客6号', 2000, 6, 'C++', '2020-01-01 10:00:00');
INSERT INTO examination_info(exam_id,tag,difficulty,duration,release_time) VALUES
(9001, 'SQL', 'hard', 60, '2021-09-01 06:00:00'),
(9002, 'C++', 'hard', 60, '2021-09-01 06:00:00'),
(9003, '算法', 'medium', 80, '2021-09-01 10:00:00');
INSERT INTO practice_record(uid,question_id,submit_time,score) VALUES
(1001, 8001, '2021-08-02 11:41:01', 60),
(1002, 8001, '2021-09-02 19:30:01', 50),
(1002, 8001, '2021-09-02 19:20:01', 70),
(1002, 8002, '2021-09-02 19:38:01', 70),
(1004, 8001, '2021-08-02 19:38:01', 70),
(1004, 8002, '2021-08-02 19:48:01', 90),
(1001, 8002, '2021-08-02 19:38:01', 70),
(1004, 8002, '2021-08-02 19:48:01', 90),
(1004, 8002, '2021-08-02 19:58:01', 94),
(1004, 8003, '2021-08-02 19:38:01', 70),
(1004, 8003, '2021-08-02 19:48:01', 90),
(1004, 8003, '2021-08-01 19:38:01', 80);
INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2021-09-01 09:01:01', '2021-09-01 09:31:00', 81),
(1002, 9002, '2021-09-01 12:01:01', '2021-09-01 12:31:01', 81),
(1003, 9001, '2021-09-01 19:01:01', '2021-09-01 19:40:01', 86),
(1003, 9002, '2021-09-01 12:01:01', '2021-09-01 12:31:51', 89),
(1004, 9001, '2021-09-01 19:01:01', '2021-09-01 19:30:01', 85),
(1005, 9002, '2021-09-01 12:01:01', '2021-09-01 12:31:02', 85),
(1006, 9003, '2021-09-07 10:01:01', '2021-09-07 10:21:01', 84),
(1006, 9001, '2021-09-07 10:01:01', '2021-09-07 10:21:01', 80);
2 数据分析
sql
select
ui.uid as uid,
--每个6/7级用户总活跃月份数
count(distinct act_month) as act_month_total,
--2021年活跃天数 (总活跃天数 = 试卷作答活跃天数 + 答题活跃天数)
count(distinct case when year(act_time) = 2021 then act_day end) as act_days_2021,
--2021年 试卷作答活跃天数
count(distinct case when year(act_time) = 2021 and tag = 'exam' then act_day end) as act_days_2021_exam,
--2021年 答题活跃天数,
count(distinct case when year(act_time) = 2021 and tag = 'question' then act_day end) as act_days_2021_question
from user_info ui
left join
( select
uid,
start_time as act_time,
date_format(start_time, '%Y%m') as act_month,
date_format(start_time, '%Y%m%d') as act_day,
'exam' as tag
from exam_record
union all
select
uid,
submit_time as act_time,
date_format(submit_time, '%Y%m') as act_month,
date_format(submit_time, '%Y%m%d') as act_day,
'question' as tag
from practice_record
)ep
on ui.uid = ep.uid
where ui.level >= 6
group by ui.uid
order by act_month_total desc, act_days_2021 desc;
思路分析:
- step1: 试卷以开始答题时间作为活跃时间,只要用户答题了就代表活跃,不管有没有完成。先筛选出用户ID,开始答题时间作为活跃时间,开始答题的月份作为活跃月份,开始答题的日期作为活跃天,标记tag为'exam'
- **step2:**题目以提交时间作为活跃时间,先筛选出用户ID,提交时间作为活跃时间,提交的月份作为活跃月份,提交的日期作为活跃天,标记tag为'question'
- **step3:**将上述两个筛选结果合并作为新表ep
- step4: 活跃信息在新表ep 中,用户等级信息在user_info中,因此在表user_info的右边去连接新表,以uid为准。left join...on...
- **step5:**从连接后的表中筛选出等级大于等于6的用户,然后统计每个用户的活跃信息: