基础
-- 基本查询
SELECT col1, col2 FROM table;
-- 别名
SELECT col1 AS name1, col2 name2 FROM table;
-- DISTINCT去重
SELECT DISTINCT department FROM employees;
-- LIMIT限制
SELECT * FROM table LIMIT 100;
-- 条件筛选
SELECT * FROM sales WHERE amount > 1000 AND region = '华东';
关联
| JOIN类型 | 左表 | 右表 | 适用场景 |
|---|---|---|---|
| INNER JOIN(JOIN) | 匹配记录 | 匹配记录 | 找共同数据 |
| LEFT JOIN | 所有记录 | 匹配记录 | 找左表有但右表没有的数据 |
| RIGHT JOIN | 匹配记录 | 所有记录 | 找右表有但左表没有的数据 |
| FULL JOIN | 所有记录 | 所有记录 | 完整对比两个表差异 |
聚合查询
sql
-- 常用聚合函数
SELECT
COUNT(*) AS total_rows,
COUNT(DISTINCT user_id) AS unique_users,
SUM(amount) AS total_amount,
AVG(amount) AS avg_amount,
MAX(amount) AS max_amount,
MIN(amount) AS min_amount,
STDDEV(amount) AS std_amount
FROM sales
WHERE dt = '2023-01-01';
-- GROUP BY分组
SELECT
department,
COUNT(*) AS emp_count,
AVG(salary) AS avg_salary
FROM employees
GROUP BY department
HAVING COUNT(*) > 5; -- HAVING过滤分组
字符串函数
sql
-- 连接
SELECT CONCAT('Hello', ' ', 'World'); -- Hello World
SELECT CONCAT_WS('-', '2023', '01', '01'); -- 2023-01-01
-- 截取
SELECT SUBSTR('Hello World', 1, 5); -- Hello
SELECT SUBSTRING('Hello World', 7); -- World
-- 长度
SELECT LENGTH('Hello'); -- 5
SELECT CHAR_LENGTH('你好'); -- 2
-- 大小写转换
SELECT LOWER('HELLO'); -- hello
SELECT UPPER('hello'); -- HELLO
-- 去除空格
SELECT TRIM(' hello '); -- hello
SELECT LTRIM(' hello'); -- hello
SELECT RTRIM('hello '); -- hello
-- 替换
SELECT REPLACE('Hello World', 'World', 'MaxCompute'); -- Hello MaxCompute
sql
-- 查找位置
SELECT INSTR('hello world', 'world'); -- 7
SELECT LOCATE('lo', 'hello world'); -- 4
-- 正则匹配
SELECT 'abc123' RLIKE '^[a-z]+[0-9]+$'; -- true
-- 分割
SELECT SPLIT('a,b,c,d', ',')[0]; -- a
SELECT EXPLODE(SPLIT('a,b,c', ',')); -- 展开为多行
-- JSON处理
SELECT GET_JSON_OBJECT('{"name":"John","age":30}', '$.name'); -- John
日期时间函数
sql
-- 当前时间
SELECT GETDATE(); -- 当前日期时间
SELECT CURRENT_TIMESTAMP; -- 当前时间戳
-- 日期转换
SELECT TO_DATE('2023-01-01', 'yyyy-MM-dd');
SELECT DATE_FORMAT(GETDATE(), 'yyyy-MM-dd HH:mm:ss');
-- 日期计算
SELECT DATEADD(GETDATE(), 7, 'dd'); -- 7天后
SELECT DATEDIFF('2023-01-10', '2023-01-01', 'dd'); -- 相差9天
-- 提取日期部分
SELECT YEAR('2023-01-01'); -- 2023
SELECT MONTH('2023-01-01'); -- 1
SELECT DAY('2023-01-01'); -- 1
SELECT HOUR('2023-01-01 15:30:00'); -- 15
SELECT WEEKOFYEAR('2023-01-01'); -- 1
条件函数与CASE语句
sql
-- 简单CASE
SELECT
name,
CASE department
WHEN 'IT' THEN '技术部'
WHEN 'HR' THEN '人力资源部'
ELSE '其他部门'
END AS dept_name
FROM employees;
-- 搜索CASE
SELECT
score,
CASE
WHEN score >= 90 THEN '优秀'
WHEN score >= 80 THEN '良好'
WHEN score >= 60 THEN '及格'
ELSE '不及格'
END AS grade
FROM students;
-- COALESCE返回第一个非NULL值
SELECT COALESCE(NULL, NULL, 'default'); -- default
-- IF函数
SELECT IF(score >= 60, '及格', '不及格') FROM scores;
-- NVL/NVL2
SELECT NVL(null_col, 'default_value');
SELECT NVL2(col, 'not_null', 'is_null');
-- DECODE(类似简单CASE)
SELECT DECODE(status, 1, '激活', 0, '禁用', '未知') FROM users;
窗口函数(重要)
sql
--排名
SELECT
name,
score,
ROW_NUMBER() OVER (ORDER BY score DESC) AS rn, -- 连续序号
RANK() OVER (ORDER BY score DESC) AS rk, -- 并列排名
DENSE_RANK() OVER (ORDER BY score DESC) AS drk -- 密集排名
FROM students;
-- 聚合窗口
SELECT
department,
name,
salary,
SUM(salary) OVER (PARTITION BY department) AS dept_total,
AVG(salary) OVER (PARTITION BY department) AS dept_avg,
-- 移动平均
AVG(salary) OVER (
PARTITION BY department
ORDER BY hire_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS moving_avg
FROM employees;
-- 偏移函数
SELECT
dt,
sales,
LAG(sales, 1) OVER (ORDER BY dt) AS prev_day_sales,
LEAD(sales, 1) OVER (ORDER BY dt) AS next_day_sales,
FIRST_VALUE(sales) OVER (PARTITION BY month ORDER BY dt) AS month_first_sales
FROM daily_sales;
ARRAY/MAP/炸开处理
sql
-- ARRAY操作
SELECT
ARRAY(1, 2, 3) AS arr,
SIZE(ARRAY(1, 2, 3)) AS arr_size,
ARRAY_CONTAINS(ARRAY(1, 2, 3), 2) AS contains_2;
-- MAP操作
SELECT
MAP('key1', 'value1', 'key2', 'value2') AS my_map,
MAP_KEYS(my_map) AS keys,
MAP_VALUES(my_map) AS values,
my_map['key1'] AS value_by_key;
--
-- EXPLODE行转列(炸开)
SELECT EXPLODE(SPLIT('a,b,c', ',')) AS item;
-- COLLECT_SET/COLLECT_LIST列转行
SELECT
department,
COLLECT_LIST(name) AS name_list,
COLLECT_SET(name) AS name_set
FROM employees
GROUP BY department;
常见示例
sql
-- 使用ROW_NUMBER去重
WITH ranked AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY login_time DESC) AS rn
FROM login_logs
)
SELECT * FROM ranked WHERE rn = 1;
-- 递归查询(MaxCompute支持有限)
SELECT * FROM employees
START WITH manager_id IS NULL
CONNECT BY PRIOR id = manager_id;
-- 随机取100行
SELECT * FROM large_table
WHERE RAND() < 0.001 -- 0.1%抽样
LIMIT 100;
-- 分层抽样
SELECT * FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY category ORDER BY RAND()) AS rn
FROM products
) t
WHERE rn <= 10; -- 每类取10个
-- 正确:分区字段在前
SELECT * FROM sales
WHERE dt = '2023-01-01' AND amount > 1000;
-- 错误:先过滤非分区字段(可能全表扫描)
SELECT * FROM sales
WHERE amount > 1000 AND dt = '2023-01-01';
-- 小表JOIN大表
SELECT /*+ MAPJOIN(small_table) */
a.*, b.*
FROM large_table a
JOIN small_table b ON a.id = b.id;
常用检测
sql
-- 检查空值和异常值
SELECT
COUNT(*) AS total,
COUNT(col1) AS not_null_count,
COUNT(DISTINCT col1) AS unique_count,
SUM(CASE WHEN col1 IS NULL THEN 1 ELSE 0 END) AS null_count,
MIN(col1) AS min_value,
MAX(col1) AS max_value
FROM your_table;
-- 使用窗口函数生成序列号
SELECT
ROW_NUMBER() OVER (ORDER BY id) AS serial_no,
*
FROM your_table;
-- 生成日期序列(时间维度)
SELECT
DATEADD('2023-01-01', n, 'dd') AS date_seq
FROM (
SELECT ROW_NUMBER() OVER (ORDER BY id) - 1 AS n
FROM some_table
LIMIT 365
) t;