背景
作为一名后端开发,在日常工作中,SQL是经常会接触的内容,但是日常开发中,遇见的sql很多都是单表查询或者多表关联取数据,遇见稍微复杂一点问题,能用java代码解决就不会思考能不能用SQL解决,缺乏对某些特殊场景的sql的编写和解决能力。本文从很多其他地方梳理找到部分场景,主要是为了扩展读者的sql思维,这里笔者要强调一下,不是强调所有开发用SQL来开发所有内容,有些处理场景java实现要比SQL简单高效的多,合适的才是最好的!
题目列表
1. 列出各个部门中工资高于本部门的平均工资的员工数和部门号,并按部门号排序
1.1 初始化数据
sql
create table test_employee
(
empid bigint not null auto_increment primary key ,
deptid varchar(32) null,
salary int null
);
insert into test_employee(deptid, salary) VALUES ('1','100');
insert into test_employee(deptid, salary) VALUES ('1','100');
insert into test_employee(deptid, salary) VALUES ('1','100');
insert into test_employee(deptid, salary) VALUES ('2','100');
insert into test_employee(deptid, salary) VALUES ('2','200');
insert into test_employee(deptid, salary) VALUES ('2','300');
insert into test_employee(deptid, salary) VALUES ('3','200');
insert into test_employee(deptid, salary) VALUES ('3','200');
insert into test_employee(deptid, salary) VALUES ('3','300');
1.2 答案
sql
-- 解法1
select test_employee.deptid, count(empid)
from test_employee,
(select deptid, avg(salary) avsalay from test_employee group by deptid) tb2
where tb2.deptid = test_employee.deptid
and test_employee.salary > avsalay
group by test_employee.deptid
order by deptid;
-- 解法2
select deptid, count(empid)
from test_employee
where salary >
(select avg(salary) from test_employee as sub where sub.deptid = test_employee.deptid)
group by deptid
order by deptid;
2. 用一条 SQL 语句查询出每门课都大于 80 分的学生姓名
2.1 初始化数据
sql
-- 用一条 SQL 语句查询出每门课都大于 80 分的学生姓名
create table test_score(
id bigint auto_increment primary key ,
name varchar(32) null ,
subject varchar(32) null ,
score int null
);
insert into test_score(name, subject, score) VALUES ('xiaoming','yuwen',78);
insert into test_score(name, subject, score) VALUES ('xiaoming','shuxue',90);
insert into test_score(name, subject, score) VALUES ('zhangsan','yuwen',81);
insert into test_score(name, subject, score) VALUES ('zhangsan','shuxue',90);
insert into test_score(name, subject, score) VALUES ('zhangsan','yingyu',90);
insert into test_score(name, subject, score) VALUES ('lisi','yuwen',66);
insert into test_score(name, subject, score) VALUES ('lisi','shuxue',90);
insert into test_score(name, subject, score) VALUES ('lisi','yingyu',90);
insert into test_score(name, subject, score) VALUES ('wangwu','yuwen',81);
insert into test_score(name, subject, score) VALUES ('wangwu','shuxue',90);
insert into test_score(name, subject, score) VALUES ('wangwu','yingyu',90);
2.2 答案
sql
-- 解法1
select name
from test_score
where score > 80
group by name
having count(subject) =
(select max(t.cusu) from (select count(distinct subject) cusu from test_score group by name) as t);
-- 解法2
select distinct name
from test_score
where name not in (select name from test_score where score <= 80);
-- 解法3
select distinct name
from test_score t1
where 80 < all (select score from test_score where name = t1.name);
-- 效果
# name
# zhangsan
# wangwu
3. 所有球队之间的比赛组合
3.1 初始化数据
sql
create table test_team(
id bigint auto_increment primary key ,
name varchar(32) null
);
insert into test_team(name)values ('A');
insert into test_team(name)values ('B');
insert into test_team(name)values ('C');
insert into test_team(name)values ('D');
insert into test_team(name)values ('E');
3.2 答案
sql
select t1.name,t2.name from test_team t1,test_team t2 where t1.name<t2.name;
# name name
# A B
# A C
# B C
# A D
# B D
# C D
# A E
# B E
# C E
# D E
4. 统计每个月份的发生额都比 科目 '1' 多的科目
4.1 初始化数据
sql
create table test_subject(
subid int null ,
month int null ,
money int null
);
insert into test_subject values (1,1,100);
insert into test_subject values (1,2,100);
insert into test_subject values (1,3,100);
insert into test_subject values (2,1,200);
insert into test_subject values (2,2,200);
insert into test_subject values (2,3,100);
insert into test_subject values (3,1,200);
insert into test_subject values (3,2,200);
insert into test_subject values (3,3,300);
4.2 答案
sql
-- 解法1
-- 统计某科目,对应的月份和销售额
select month,money from test_subject where subid='XXX';
select t1.subid
from test_subject t1,
(select month, money from test_subject where subid = '1') t2
where t1.month = t2.month
and t1.money > t2.money
group by t1.subid
having count(t1.month) = (select count(distinct month) from test_subject where subid = '1')
;
-- 解法2
select distinct subid
from test_subject
where subid not in (select distinct t1.subid
from test_subject t1,
(select * from test_subject where subid = '1') t2
where t1.month = t2.month
and t1.money <= t2.money);
5. 行转置
5.1 问题描述
sql
比如,将下面的表数据
year month amount
1991 1 1.1
1991 2 1.2
1991 3 1.3
1991 4 1.4
1992 1 2.1
查成这样一个结果
year m1 m2 m3 m4
1991 1.1 1.2 1.3 1.4
1992 2.1 2.2 2.3 2.4
5.2 初始化数据
sql
create table test_sales(
year int null ,
month int null ,
amount int null
);
insert into test_sales values ('2000','1',1);
insert into test_sales values ('2000','2',4);
insert into test_sales values ('2000','3',5);
insert into test_sales values ('2001','1',2);
insert into test_sales values ('2001','2',3);
insert into test_sales values ('2001','3',7);
insert into test_sales values ('2002','1',11);
insert into test_sales values ('2002','2',12);
insert into test_sales values ('2002','3',13);
5.3 解决方案
sql
-- 方案1
select year,
sum(case when month = '1' then amount else 0 end) as m1,
sum(case when month = '2' then amount else 0 end) as m2,
sum(case when month = '3' then amount else 0 end) as m3
from test_sales group by year;
# year m1 m2 m3
# 2000 1 4 5
# 2001 2 3 7
# 2002 11 12 13
-- 方案2
select year,
(select amount from test_sales t where t.year = sales.year and t.month = 1) as m1,
(select amount from test_sales t where t.year = sales.year and t.month = 2) as m2,
(select amount from test_sales t where t.year = sales.year and t.month = 3) as m3
from test_sales sales
group by year;
6. 显示文章标题,发帖人、最后回复时间
6.1 初始化数据
sql
create table test_article (
id bigint auto_increment primary key ,
title varchar(32) null comment '标题',
postuser varchar(32) null comment '发帖人',
postdate varchar(32) null comment '回复时间',
parentid bigint null comment '上个帖子id'
);
insert into test_article(id,title, postuser, postdate, parentid) VALUES (1,'标题1','user1','20241218',null);
insert into test_article(id,title, postuser, postdate, parentid) VALUES (2,'标题1','user1','20241217',1);
insert into test_article(id,title, postuser, postdate, parentid) VALUES (3,'标题1','user1','20241216',2);
insert into test_article(id,title, postuser, postdate, parentid) VALUES (4,'标题2','user2','20241218',null);
insert into test_article(id,title, postuser, postdate, parentid) VALUES (5,'标题2','user2','20241218',4);
insert into test_article(id,title, postuser, postdate, parentid) VALUES (6,'标题2','user2','20241217',5);
insert into test_article(id,title, postuser, postdate, parentid) VALUES (7,'标题2','user3','20241216',6);
6.2 解决方案
sql
-- 解法1
-- 筛选最先发布的帖子
select * from test_article where parentid is null;
-- 筛选以最先发布的帖子xxx为上级帖子的所有帖子的最新发布时间
select postuser,title,max(postdate) from test_article where id='XXX' group by postuser,title;
select postuser, title, (select max(postdate) from test_article where parentid = a.id) t2
from test_article a
where a.parentid is null;
-- 解法2
select t2.postuser, t2.title, max(t2.postdate)
from test_article t1
inner join test_article t2 on t1.id = t2.parentid
where t1.parentid is null
group by t2.postuser, t2.title
# postuser title max(t2.postdate)
# user1 标题1 20241217
# user2 标题2 20241218
7. 删除除了 id 号不同,其他都相同的学生冗余信息
7.1 初始化数据
sql
create table test_student(
stuid int null ,
name varchar(32) null ,
age int null,
sex int null comment '1 男 2 女'
);
insert into test_student(stuid, name, age,sex) VALUES (4,'stu1',18,1);
insert into test_student(stuid, name, age,sex) VALUES (5,'stu2',10,2);
insert into test_student(stuid, name, age,sex) VALUES (6,'stu3',12,1);
insert into test_student(stuid, name, age,sex) VALUES (7,'stu3',12,1);
7.2 解决方案
sql
-- 解法1 找到不一样的记录,这里注意id比较用的是小于号或者大于号,而不是不等于,否则会重复
select *
from test_student t1,
test_student t2
where t1.stuid < t2.stuid
and t1.name = t2.name
and t1.age = t2.age
and t1.sex = t2.sex;
-- 删除的时候,要留意下面脚本会报错,因为同一个表在同一个脚本中又是查询,又是删除,此时可以多嵌套一层,定义一个虚表
delete
from test_student
where stuid in (select t1.stuid
from test_student t1,
test_student t2
where t1.stuid < t2.stuid
and t1.name = t2.name
and t1.age = t2.age
and t1.sex = t2.sex);
-- 下面可以正常删除
delete
from test_student
where stuid in (select t.stuid
from (select t1.stuid
from test_student t1,
test_student t2
where t1.stuid < t2.stuid
and t1.name = t2.name
and t1.age = t2.age
and t1.sex = t2.sex) as t);
-- 解法2
select *
from test_student t1
inner join
test_student t2
on
t1.stuid < t2.stuid
and t1.name = t2.name
and t1.age = t2.age
and t1.sex = t2.sex;
delete
from test_student
where stuid in (select stuid
from (select t1.stuid
from test_student t1
inner join
test_student t2
on
t1.stuid < t2.stuid
and t1.name = t2.name
and t1.age = t2.age
and t1.sex = t2.sex) as t);
8. 航空网的几个航班查询题
8.1.1 查询起飞城市是北京的所有航班,按到达城市的名字排序
8.1.2 查询北京到上海的所有航班纪录(起飞城市,到达城市,起飞时间,航班号)
8.1.3 查询具体某一天(20240103)的北京到上海的的航班次数
8.2 初始化数据
sql
create table test_flight(
flightId varchar(32) null ,
startCityId varchar(32),
endCityId varchar(32),
startTime varchar(32)
);
create table test_city(
cityId varchar(32),
cityName varchar(32)
);
insert into test_flight(flightId, startCityId, endCityId, startTime) VALUES ('f1','c1','c2','20240101');
insert into test_flight(flightId, startCityId, endCityId, startTime) VALUES ('f2','c2','c3','20240102');
insert into test_flight(flightId, startCityId, endCityId, startTime) VALUES ('f3','c3','c4','20240103');
insert into test_flight(flightId, startCityId, endCityId, startTime) VALUES ('f4','c1','c4','20240103');
insert into test_flight(flightId, startCityId, endCityId, startTime) VALUES ('f5','c1','c2','20240101');
insert into test_city(cityId, cityName) VALUES ('c1','北京');
insert into test_city(cityId, cityName) VALUES ('c2','上海');
insert into test_city(cityId, cityName) VALUES ('c3','广州');
insert into test_city(cityId, cityName) VALUES ('c4','深圳');
8.3 解决方案
sql
-- 1小问 查询起飞城市是北京的所有航班,按到达城市的名字排序 解法
select * from test_city where cityName='北京';
-- 解法1
select test_flight.flightId
from test_flight,
(select * from test_city) t2
where startCityId = (select cityId from test_city where cityName = '北京')
and t2.cityId = endCityId
order by cityName;
-- 解法2
select flightId
from test_flight,
(select * from test_city) c1,
(select * from test_city) c2
where startCityId = c1.cityId
and c1.cityName = '北京'
and endCityId = c2.cityId
order by c2.cityName;
-- 解法3
select flightId
from test_flight
inner join
(select * from test_city) c1
inner join
(select * from test_city) c2
on startCityId = c1.cityId
and c1.cityName = '北京'
and endCityId = c2.cityId
order by c2.cityName;
-- 2 小问 查询北京到上海的所有航班纪录(起飞城市,到达城市,起飞时间,航班号)
-- 解法1
select test_flight.*, c1.cityName, c2.cityName
from test_flight,
(select * from test_city where cityName = '北京') c1,
(select * from test_city where cityName = '上海') c2
where startCityId = c1.cityId
and endCityId = c2.cityId;
-- 解法2
select test_flight.*, c1.cityName, c2.cityName
from test_flight
inner join
(select * from test_city where cityName = '北京') c1
inner join
(select * from test_city where cityName = '上海') c2
on startCityId = c1.cityId
and endCityId = c2.cityId;
-- 查询具体某一天(20240101)的北京到上海的的航班次数
select count(flightId)
from test_flight,
(select * from test_city where cityName = '北京') c1,
(select * from test_city where cityName = '上海') c2
where startCityId = c1.cityId
and endCityId = c2.cityId
and startTime='20240101';
select count(flightId)
from test_flight
inner join
(select * from test_city where cityName = '北京') c1
inner join
(select * from test_city where cityName = '上海') c2
on startCityId = c1.cityId
and endCityId = c2.cityId
where startTime='20240101';
9. 查出比经理薪水还高的员工信息
9.1 初始化数据
sql
create table test_employee2(
id int null ,
name varchar(32) null ,
salary int null ,
managerid int
);
insert into test_employee2 values (1,'lisi',300,null);
insert into test_employee2 values (2,'zhangsan',100,1);
insert into test_employee2 values (3,'wangwu',400,1);
insert into test_employee2 values (4,'lilianjie',600,2);
insert into test_employee2 values (5,'chenlong',50,4);
9.2 解决方案
sql
-- 解法1
select t1.*
from test_employee2 t1,
test_employee2 t2
where t1.managerid = t2.id
and t1.salary > t2.salary;
-- 解法2
select t1.*
from test_employee2 t1 inner join
test_employee2 t2
on t1.managerid = t2.id
and t1.salary > t2.salary;
#id name salary managerid
#3 wangwu 400 1
#4 lilianjie 600 2
10. 求出小于 45 岁的各个老师所带的大于 12 岁的学生人数
10.1 初始化数据
sql
create table test_teacher(
teaid int null ,
name varchar(32) null ,
age int null
);
create table test_teacher_student(
teaid int null ,
stuid int null
);
insert into test_teacher (teaid, name, age) VALUES (1,'tea1',34);
insert into test_teacher (teaid, name, age) VALUES (2,'tea2',60);
insert into test_teacher (teaid, name, age) VALUES (3,'tea3',40);
-- 学生数据复用上面案例造的
insert into test_teacher_student(teaid, stuid) values (1,4);
insert into test_teacher_student(teaid, stuid) values (1,5);
insert into test_teacher_student(teaid, stuid) values (1,6);
insert into test_teacher_student(teaid, stuid) values (2,6);
insert into test_teacher_student(teaid, stuid) values (3,6);
insert into test_teacher_student(teaid, stuid) values (2,4);
insert into test_teacher_student(teaid, stuid) values (3,4);
10.2 解决方案
sql
-- 解法1
-- 求出小于 45 岁的各个老师所带的大于 12 岁的学生人数
select * from test_teacher where age<'45';
select * from test_student where age>'12';
select count(t1.stuid)
from test_teacher_student t1,
(select *
from test_teacher
where age < '45') t2,
(select * from test_student where age > '12') t3
where t2.teaid = t1.teaid
and t3.stuid = t1.stuid
;
-- 解法2
select count(t1.stuid)
from test_teacher_student t1
inner join
(select *
from test_teacher
where age < '45') t2
inner join
(select * from test_student where age > '12') t3
on t2.teaid = t1.teaid
and t3.stuid = t1.stuid
;
11. 求出发帖最多的人
11.1 初始化数据
sql
数据复用上面案例中的数据
11.2 解决方案
sql
-- 解法1
select postuser
from test_article
group by postuser
having count(*) = (select max(cut)
from (select postuser, count(*) cut from test_article group by postuser) t2);
12. 一个用户具有多个角色,请查询出该表中具有该用户的所有角色的其他用户
12.1 初始化数据
sql
create table test_user
(
userId varchar(32),
userName varchar(32)
);
create table test_role
(
roleId varchar(32),
roleName varchar(32)
);
create table test_user_role
(
userId varchar(32),
roleId varchar(32)
);
insert into test_user(userId, userName) VALUES ('u1','zhangsanfeng');
insert into test_user(userId, userName) VALUES ('u2','chenglong');
insert into test_user(userId, userName) VALUES ('u3','lixiaolong');
insert into test_user(userId, userName) VALUES ('u4','linghuchong');
insert into test_role(roleId, roleName) VALUES ('r1','角色1');
insert into test_role(roleId, roleName) VALUES ('r2','角色2');
insert into test_role(roleId, roleName) VALUES ('r3','角色3');
insert into test_user_role(userId, roleId) values ('u1','r1');
insert into test_user_role(userId, roleId) values ('u1','r2');
insert into test_user_role(userId, roleId) values ('u1','r3');
insert into test_user_role(userId, roleId) values ('u2','r1');
insert into test_user_role(userId, roleId) values ('u2','r2');
insert into test_user_role(userId, roleId) values ('u3','r2');
insert into test_user_role(userId, roleId) values ('u4','r2');
insert into test_user_role(userId, roleId) values ('u2','r3');
12.2 解决方案
sql
-- 解法1 一个用户xxx具有多个角色,请查询出该表中具有该用户的所有角色的其他用户
-- 筛选该用户的所有角色
select * from test_user_role where userId=(select test_user.userId from test_user where userName='zhangsanfeng');
-- 筛选本人以外的其他用户
select t2.userId, count(t2.roleId)
from test_user t1,
test_user_role t2
where t1.userId = t2.userId
and roleId in (select roleId
from test_user_role
where userId = (select test_user.userId from test_user where userName = 'zhangsanfeng'))
and t2.userId != (select test_user.userId from test_user where userName = 'zhangsanfeng')
group by t2.userId
having count(t2.roleId) = (select count(*)
from test_user_role
where userId = (select test_user.userId from test_user where userName = 'zhangsanfeng'));
select *
from test_user
where userId in (select t2.userId
from test_user t1,
test_user_role t2
where t1.userId = t2.userId
and roleId in (select roleId
from test_user_role
where userId =
(select test_user.userId from test_user where userName = 'zhangsanfeng'))
-- 排除本人
and t2.userId != (select test_user.userId from test_user where userName = 'zhangsanfeng')
group by t2.userId
-- 统计计数,保证所有角色都有
having count(t2.roleId) = (select count(*)
from test_user_role
where userId =
(select test_user.userId from test_user where userName = 'zhangsanfeng')))
-- 优化后版本
SELECT *
FROM test_user
WHERE userId IN (SELECT t2.userId
FROM test_user_role t2
WHERE t2.roleId IN (SELECT roleId
FROM test_user_role
WHERE userId = (SELECT userId FROM test_user WHERE userName = 'zhangsanfeng'))
GROUP BY t2.userId
HAVING COUNT(DISTINCT t2.roleId) = (SELECT COUNT(*)
FROM test_user_role
WHERE userId = (SELECT userId FROM test_user WHERE userName = 'zhangsanfeng'))
AND t2.userId != (SELECT userId FROM test_user WHERE userName = 'zhangsanfeng'));
-- 高阶函数优化
WITH RoleSet AS (
SELECT roleId
FROM test_user_role
WHERE userId = (SELECT userId FROM test_user WHERE userName = 'zhangsanfeng')
),
FilteredUsers AS (
SELECT t2.userId
FROM test_user_role t2
JOIN RoleSet r ON t2.roleId = r.roleId
GROUP BY t2.userId
HAVING COUNT(t2.roleId) = (SELECT COUNT(*) FROM RoleSet)
)
SELECT *
FROM test_user
WHERE userId IN (SELECT userId FROM FilteredUsers)
AND userId != (SELECT userId FROM test_user WHERE userName = 'zhangsanfeng');
13. 查出个人工资高于其所在部门平均工资的员工,列出这些员工的全部个人信息及该员工工资高出部门平均工资百分比
13.1 初始化数据
sql
create table test_employee3
(
id int null,
firstname varchar(32),
lastname varchar(32),
salary int,
hireddate varchar(32),
departmentid varchar(32)
);
create table test_department
(
id varchar(32),
name varchar(32)
);
insert into test_employee3(id, firstname, lastname, salary, hireddate, departmentid) VALUES (1,'zahng','sanfeng',1500,'20241010',11);
insert into test_employee3(id, firstname, lastname, salary, hireddate, departmentid) VALUES (2,'danny','obama',1500,'20241010',11);
insert into test_employee3(id, firstname, lastname, salary, hireddate, departmentid) VALUES (3,'janny','obama',60000,'20241010',11);
insert into test_employee3(id, firstname, lastname, salary, hireddate, departmentid) VALUES (4,'li','si',100,'20231010',12);
insert into test_employee3(id, firstname, lastname, salary, hireddate, departmentid) VALUES (5,'li','xaiolong',10000,'20221010',13);
insert into test_employee3(id, firstname, lastname, salary, hireddate, departmentid) VALUES (6,'li','ming',10000,'20221012',13);
insert into test_department(id,name)values (11,'科技');
insert into test_department(id,name)values (12,'人事');
insert into test_department(id,name)values (13,'武打');
13.2 解决方案
sql
select departmentid,avg(salary) from test_employee3 group by departmentid;
select t1.*,(t1.salary-avg)/avg as percent
from test_employee3 t1,
(select departmentid, avg(salary) as avg from test_employee3 group by departmentid) t2
where t1.departmentid = t2.departmentid
and t1.salary > avg
总结
以上脚本是笔者逐个验证过的,可以直接运行测试,通过以上案例可以发现以下几点:
- SQL中有点绕的是表自关联处理数据
- 极大部分子查询笛卡尔积的SQL都可以改写为join查询
- mysql8 提供的高阶函数,窗口函数等会极大简化sql处理
- 遇见复杂的sql问题需要逐步拆解为小问题,最后将所有小脚本合并为大脚本
- 有时候逆向思维会简化问题,比如案例2
以上,谢谢阅读,如有不对,请不吝指正,另外,也欢迎有其他更好的方案的老师评论区留言。