mysql not in 查询引发的bug问题记录
数据准备
版本信息
mysql 8.0.13
shell
mysql> select version();
+-----------+
| version() |
+-----------+
| 8.0.13 |
+-----------+
1 row in set (0.00 sec)
建表语句
sql
create table t_null_test(
id bigint primary key auto_increment,
city varchar(100) comment '居住城市',
user_id bigint comment '用户ID'
) charset=utf8mb4 comment = 'not in null测试表';
create table t_user(
id bigint primary key auto_increment,
name varchar(50) comment '姓名'
) charset=utf8mb4;
查看表信息
show create table t_null_test\G
shell
mysql> show create table t_null_test\G;
*************************** 1. row ***************************
Table: t_null_test
Create Table: CREATE TABLE `t_null_test` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`city` varchar(100) DEFAULT NULL COMMENT '居住城市',
`user_id` bigint(20) DEFAULT NULL COMMENT '用户ID',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci COMMENT='not in null测试表'
1 row in set (0.00 sec)
ERROR:
No query specified
mysql> show create table t_user\G;
*************************** 1. row ***************************
Table: t_user
Create Table: CREATE TABLE `t_user` (
`id` bigint(20) NOT NULL AUTO_INCREMENT,
`name` varchar(50) DEFAULT NULL COMMENT '姓名',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
1 row in set (0.00 sec)
ERROR:
No query specified
测试数据
sql
insert into t_null_test(city, user_id) values('佛山', 'cd'),('广州', 'scd'), ('深圳', 'ss'), ('东莞', null);
shell
mysql> insert into t_null_test(city, user_id) values('佛山', 1),('广州', 2), ('深圳', 3), ('东莞', null);
Query OK, 4 rows affected (0.16 sec)
Records: 4 Duplicates: 0 Warnings: 0
mysql> select * from t_null_test;
+----+------+---------+
| id | city | user_id |
+----+------+---------+
| 1 | 佛山 | 1 |
| 2 | 广州 | 2 |
| 3 | 深圳 | 3 |
| 4 | 东莞 | NULL |
+----+------+---------+
4 rows in set (0.00 sec)
mysql> insert into t_user(name) values('scd'),('cd'),('ss'),('sss');
Query OK, 4 rows affected (0.18 sec)
Records: 4 Duplicates: 0 Warnings: 0
mysql> select * from t_user;
+----+------+
| id | name |
+----+------+
| 1 | scd |
| 2 | cd |
| 3 | ss |
| 4 | sss |
+----+------+
4 rows in set (0.00 sec)
问题说明
使用 not in 查询
shell
mysql> select * from t_user where id not in (select user_id from t_null_test);
Empty set (0.07 sec)
这条有问题的sql 主要的功能是从t_user表中筛选出那些id值不在t_null_test表的user_id列中的记录,查询出的结果为空,实际上是有一个id=4不存在 t_null_test表的
问题分析以及正确写法
执行逻辑
- 先执行子查询select user_id from t_null_test获取所有user_id值
- 主查询检查t_user表中每条记录的id是否不在子查询结果集中
- 最终返回满足条件的完整用户记录
错误问题分析
NOT IN在处理包含NULL值的子查询时可能返回意外结果,因为NULL值的比较会返回UNKNOWN而非TRUE/FALSE
性能分析
当子查询结果集较大时性能较差,因为它需要对子查询结果进行哈希匹配
正确写法
- 过滤 null 数据
shell
mysql> select * from t_user where id not in (select user_id from t_null_test where user_id is not null);
+----+------+
| id | name |
+----+------+
| 4 | sss |
+----+------+
1 row in set (0.00 sec)
- 使用 not exists (推荐)
shell
mysql> select * from t_user u where not exists (select 1 from t_null_test t where u.id=t.user_id);
+----+------+
| id | name |
+----+------+
| 4 | sss |
+----+------+
1 row in set (0.00 sec)
- 使用left join 过滤 null
shell
mysql> explain select * from t_user u left join t_null_test t on u.id=t.user_id where t.user_id is null;
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
| 1 | SIMPLE | u | NULL | ALL | NULL | NULL | NULL | NULL | 4 | 100.00 | NULL |
| 1 | SIMPLE | t | NULL | ALL | idx_user_id | NULL | NULL | NULL | 4 | 25.00 | Using where; Using join buffer (Block Nested Loop) |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
2 rows in set, 1 warning (0.00 sec)
为啥推荐 not exists, 查看sql 执行计划分析
shell
mysql> explain select * from t_user u where not exists (select 1 from t_null_test t where u.id=t.user_id);
+----+--------------------+-------+------------+------+---------------+-------------+---------+-----------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+-------+------------+------+---------------+-------------+---------+-----------+------+----------+-------------+
| 1 | PRIMARY | u | NULL | ALL | NULL | NULL | NULL | NULL | 4 | 100.00 | Using where |
| 2 | DEPENDENT SUBQUERY | t | NULL | ref | idx_user_id | idx_user_id | 9 | test.u.id | 1 | 100.00 | Using index |
+----+--------------------+-------+------------+------+---------------+-------------+---------+-----------+------+----------+-------------+
2 rows in set, 2 warnings (0.00 sec)
mysql> explain select * from t_user where id not in (select user_id from t_null_test where user_id is not null);
+----+--------------------+-------------+------------+----------------+---------------+-------------+---------+------+------+----------+--------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+--------------------+-------------+------------+----------------+---------------+-------------+---------+------+------+----------+--------------------------+
| 1 | PRIMARY | t_user | NULL | ALL | NULL | NULL | NULL | NULL | 4 | 100.00 | Using where |
| 2 | DEPENDENT SUBQUERY | t_null_test | NULL | index_subquery | idx_user_id | idx_user_id | 9 | func | 2 | 100.00 | Using where; Using index |
+----+--------------------+-------------+------------+----------------+---------------+-------------+---------+------+------+----------+--------------------------+
2 rows in set, 1 warning (0.02 sec)
mysql> explain select * from t_user u left join t_null_test t on u.id=t.user_id where t.user_id is null;
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
| 1 | SIMPLE | u | NULL | ALL | NULL | NULL | NULL | NULL | 4 | 100.00 | NULL |
| 1 | SIMPLE | t | NULL | ALL | idx_user_id | NULL | NULL | NULL | 4 | 25.00 | Using where; Using join buffer (Block Nested Loop) |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+----------------------------------------------------+
2 rows in set, 1 warning (0.00 sec)
扫描行数计算 rows * filtered%
- not exists = 1 * 100% = 1
- not in = 2 * 100% = 2
- left join ... not null = 4 * 25% = 1