1. 利用子查询进行数据操作
1.1 使用子查询插入数据
子查询在数据插入操作中非常实用,特别是在需要从其他表中提取数据插入到目标表时。
基础语法
bash
sql
INSERT INTO target_table (column1, column2, ...)
SELECT column1, column2, ...
FROM source_table
WHERE condition;
bash
实战示例
示例1:从历史表迁移数据
sql
-- 创建订单汇总表
CREATE TABLE order_summary (
customer_id INT,
total_orders INT,
total_amount DECIMAL(10,2),
last_order_date DATE
);
-- 使用子查询插入汇总数据
INSERT INTO order_summary (customer_id, total_orders, total_amount, last_order_date)
SELECT
customer_id,
COUNT(*) as total_orders,
SUM(order_amount) as total_amount,
MAX(order_date) as last_order_date
FROM orders
WHERE order_date >= '2024-01-01'
GROUP BY customer_id;
示例2:条件性数据插入
sql
-- 创建VIP客户表
CREATE TABLE vip_customers (
customer_id INT PRIMARY KEY,
customer_name VARCHAR(100),
total_purchase DECIMAL(10,2)
);
-- 仅插入消费超过10000的客户
INSERT INTO vip_customers (customer_id, customer_name, total_purchase)
SELECT
c.customer_id,
c.name,
COALESCE(SUM(o.amount), 0)
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
WHERE c.customer_id IN (
SELECT customer_id
FROM orders
GROUP BY customer_id
HAVING SUM(amount) > 10000
)
GROUP BY c.customer_id, c.name;
1.2 使用子查询更新数据
子查询在更新操作中可以实现基于其他表的复杂条件更新。
基础语法
bash
sql
UPDATE target_table
SET column1 = (SELECT ...),
column2 = (SELECT ...)
WHERE condition;
bash
实战示例
示例1:更新客户等级
sql
-- 添加客户等级字段
ALTER TABLE customers ADD COLUMN customer_level VARCHAR(20) DEFAULT '普通';
-- 根据消费金额更新客户等级
UPDATE customers c
SET customer_level =
CASE
WHEN (
SELECT COALESCE(SUM(amount), 0)
FROM orders o
WHERE o.customer_id = c.customer_id
) > 50000 THEN '钻石'
WHEN (
SELECT COALESCE(SUM(amount), 0)
FROM orders o
WHERE o.customer_id = c.customer_id
) > 20000 THEN '黄金'
WHEN (
SELECT COALESCE(SUM(amount), 0)
FROM orders o
WHERE o.customer_id = c.customer_id
) > 5000 THEN '白银'
ELSE '普通'
END;
示例2:批量更新库存信息
sql
-- 更新产品库存状态
UPDATE products p
SET
stock_status =
CASE
WHEN p.quantity_in_stock <= 0 THEN '缺货'
WHEN p.quantity_in_stock < p.minimum_stock THEN '低库存'
ELSE '充足'
END,
last_restock_date = (
SELECT MAX(restock_date)
FROM inventory_log
WHERE product_id = p.product_id
)
WHERE p.is_active = 1;
示例3:使用关联子查询更新
sql
-- 更新每个客户的最后购买日期
UPDATE customers c
SET last_purchase_date = (
SELECT MAX(order_date)
FROM orders o
WHERE o.customer_id = c.customer_id
AND o.status = 'completed'
);
1.3 使用子查询删除数据
子查询在删除操作中可以帮助我们实现基于复杂条件的数据清理。
bash
基础语法
sql
DELETE FROM table_name
WHERE column_name OPERATOR (SELECT ...);
bash
实战示例
示例1:删除长期未活跃用户
sql
-- 删除超过2年未登录且无订单的用户
DELETE FROM users
WHERE user_id IN (
SELECT u.user_id
FROM users u
LEFT JOIN login_log l ON u.user_id = l.user_id
LEFT JOIN orders o ON u.user_id = o.customer_id
WHERE (
SELECT MAX(login_time)
FROM login_log
WHERE user_id = u.user_id
) < DATE_SUB(NOW(), INTERVAL 2 YEAR)
OR (
SELECT COUNT(*)
FROM orders
WHERE customer_id = u.user_id
) = 0
);
示例2:清理测试数据
sql
-- 删除所有测试订单及相关记录
DELETE FROM order_details
WHERE order_id IN (
SELECT order_id
FROM orders
WHERE order_type = 'test'
);
DELETE FROM orders
WHERE order_type = 'test';
示例3:删除重复数据
sql
-- 删除重复的客户记录(保留最新的一条)
DELETE FROM customers
WHERE customer_id IN (
SELECT c1.customer_id
FROM customers c1
INNER JOIN (
SELECT email, MAX(created_at) as max_date
FROM customers
GROUP BY email
HAVING COUNT(*) > 1
) c2 ON c1.email = c2.email AND c1.created_at < c2.max_date
);
2. 通用表达式(CTE)详解
2.1 CTE基础概念
通用表表达式(CTE)是MySQL 8.0引入的重要功能,它提供了更清晰、可读性更强的查询结构。
bash
基本语法
sql
WITH cte_name (column1, column2, ...) AS (
SELECT column1, column2, ...
FROM table_name
WHERE condition
)
SELECT * FROM cte_name;
2.2 CTE实战应用
bash
示例1:递归CTE处理层次结构数据
sql
-- 创建部门表
CREATE TABLE departments (
dept_id INT PRIMARY KEY,
dept_name VARCHAR(100),
parent_dept_id INT,
manager_id INT
);
-- 使用递归CTE查询部门层级
WITH RECURSIVE department_hierarchy AS (
-- 基础查询:顶级部门
SELECT
dept_id,
dept_name,
parent_dept_id,
manager_id,
1 as level,
CAST(dept_name AS CHAR(500)) as hierarchy_path
FROM departments
WHERE parent_dept_id IS NULL
UNION ALL
-- 递归查询:子部门
SELECT
d.dept_id,
d.dept_name,
d.parent_dept_id,
d.manager_id,
dh.level + 1,
CONCAT(dh.hierarchy_path, ' > ', d.dept_name)
FROM departments d
INNER JOIN department_hierarchy dh ON d.parent_dept_id = dh.dept_id
)
SELECT
dept_id,
dept_name,
level,
hierarchy_path,
(SELECT COUNT(*) FROM employees WHERE department_id = dept_id) as employee_count
FROM department_hierarchy
ORDER BY hierarchy_path;
示例2:多重CTE构建复杂报表
sql
-- 生成月度销售报表
WITH
monthly_sales AS (
SELECT
DATE_FORMAT(order_date, '%Y-%m') as month,
product_category,
SUM(quantity) as total_quantity,
SUM(amount) as total_amount
FROM orders o
JOIN order_details od ON o.order_id = od.order_id
JOIN products p ON od.product_id = p.product_id
WHERE order_date >= DATE_SUB(NOW(), INTERVAL 12 MONTH)
GROUP BY DATE_FORMAT(order_date, '%Y-%m'), product_category
),
category_summary AS (
SELECT
product_category,
AVG(total_amount) as avg_monthly_sales,
SUM(total_amount) as yearly_total
FROM monthly_sales
GROUP BY product_category
),
growth_analysis AS (
SELECT
ms.month,
ms.product_category,
ms.total_amount,
LAG(ms.total_amount) OVER (
PARTITION BY ms.product_category
ORDER BY ms.month
) as prev_month_amount,
ROUND(
(ms.total_amount - LAG(ms.total_amount) OVER (
PARTITION BY ms.product_category
ORDER BY ms.month
)) / LAG(ms.total_amount) OVER (
PARTITION BY ms.product_category
ORDER BY ms.month
) * 100, 2
) as growth_rate
FROM monthly_sales ms
)
SELECT
ga.month,
ga.product_category,
ga.total_amount,
cs.yearly_total,
cs.avg_monthly_sales,
ga.prev_month_amount,
ga.growth_rate,
CASE
WHEN ga.growth_rate > 20 THEN '快速增长'
WHEN ga.growth_rate BETWEEN 5 AND 20 THEN '稳步增长'
WHEN ga.growth_rate BETWEEN -5 AND 5 THEN '稳定'
ELSE '下降'
END as trend
FROM growth_analysis ga
JOIN category_summary cs ON ga.product_category = cs.product_category
ORDER BY ga.month DESC, ga.total_amount DESC;
bash
示例3:CTE优化复杂查询
sql
-- 找出每个部门薪资最高的员工
WITH
department_salaries AS (
SELECT
d.dept_name,
e.employee_name,
e.salary,
ROW_NUMBER() OVER (
PARTITION BY e.department_id
ORDER BY e.salary DESC, e.hire_date
) as salary_rank
FROM employees e
JOIN departments d ON e.department_id = d.dept_id
WHERE e.is_active = 1
),
top_performers AS (
SELECT
dept_name,
employee_name,
salary,
salary_rank
FROM department_salaries
WHERE salary_rank = 1
),
department_averages AS (
SELECT
d.dept_name,
AVG(e.salary) as avg_salary,
COUNT(*) as employee_count
FROM employees e
JOIN departments d ON e.department_id = d.dept_id
WHERE e.is_active = 1
GROUP BY d.dept_name
)
SELECT
tp.dept_name,
tp.employee_name as top_earner,
tp.salary as top_salary,
da.avg_salary,
da.employee_count,
ROUND((tp.salary - da.avg_salary) / da.avg_salary * 100, 2) as salary_difference_percent
FROM top_performers tp
JOIN department_averages da ON tp.dept_name = da.dept_name
ORDER BY salary_difference_percent DESC;
2.3 CTE性能优化技巧
bash
使用CTE代替嵌套子查询
sql
-- 优化前:使用嵌套子查询
SELECT
c.customer_name,
c.email,
(SELECT COUNT(*) FROM orders WHERE customer_id = c.customer_id) as order_count,
(SELECT SUM(amount) FROM orders WHERE customer_id = c.customer_id) as total_spent
FROM customers c
WHERE c.created_date > '2024-01-01';
-- 优化后:使用CTE
WITH customer_stats AS (
SELECT
customer_id,
COUNT(*) as order_count,
SUM(amount) as total_spent
FROM orders
GROUP BY customer_id
)
SELECT
c.customer_name,
c.email,
COALESCE(cs.order_count, 0) as order_count,
COALESCE(cs.total_spent, 0) as total_spent
FROM customers c
LEFT JOIN customer_stats cs ON c.customer_id = cs.customer_id
WHERE c.created_date > '2024-01-01';
使用CTE提高递归查询性能
sql
-- 设置递归查询的最大深度
SET SESSION cte_max_recursion_depth = 1000;
WITH RECURSIVE employee_hierarchy AS (
SELECT
employee_id,
employee_name,
manager_id,
1 as level,
CAST(employee_name AS CHAR(1000)) as hierarchy_path
FROM employees
WHERE manager_id IS NULL
UNION ALL
SELECT
e.employee_id,
e.employee_name,
e.manager_id,
eh.level + 1,
CONCAT(eh.hierarchy_path, ' > ', e.employee_name)
FROM employees e
INNER JOIN employee_hierarchy eh ON e.manager_id = eh.employee_id
WHERE eh.level < 10 -- 限制递归深度
)
SELECT * FROM employee_hierarchy;
3. 最佳实践与注意事项
3.1 性能优化建议
使用EXISTS代替IN:对于大数据集,EXISTS通常比IN性能更好
bash
sql
-- 优化前
SELECT * FROM products
WHERE category_id IN (SELECT category_id FROM categories WHERE is_active = 1);
-- 优化后
SELECT * FROM products p
WHERE EXISTS (SELECT 1 FROM categories c WHERE c.category_id = p.category_id AND c.is_active = 1);
合理使用索引:确保子查询中使用的列都有适当的索引
限制子查询结果集:使用LIMIT限制返回的数据量
3.2 可读性提升技巧
使用有意义的CTE名称:让CTE名称反映其用途
适当添加注释:解释复杂子查询的逻辑
格式化查询:保持一致的缩进和换行
总结
MySQL子查询和通用表达式是强大的数据操作工具。子查询在数据插入、更新和删除操作中提供了极大的灵活性,而CTE则显著提高了复杂查询的可读性和可维护性。掌握这些技术,将使你在处理复杂业务逻辑和数据操作时事半功倍。
记住关键原则:在追求功能实现的同时,始终关注查询性能和代码可读性。适当的索引、合理的查询结构,以及清晰的代码注释,都是写出优秀SQL代码的重要保障。