-- 分组查询
SELECT sex,SUM(mark) sum_mark
FROM score
GROUP BY sex
HAVING sum_mark > 555;
SELECT sex,sum_mark
FROM(
SELECT sex,SUM(mark) sum_mark
FROM score
GROUP BY sex
) t
WHERE sum_mark > 555;
SELECT AVG(gid),SUM(gid)/COUNT(gid) FROM student;
SELECT COUNT(gid),COUNT(DISTINCT gid) FROM student;
SELECT collect_list(gid),collect_set(gid) FROM student;
+------------+--------+--+
| _c0 | _c1 |
+------------+--------+--+
| [1,1,2,2] | [1,2] |
+------------+--------+--+
SELECT collect_list(gid),collect_list(DISTINCT gid) FROM student;结果同上
-- 窗口排名函数
SELECT *,
ROW_NUMBER() OVER(ORDER BY id) rn
FROM score;
+-----------+-------------+------------+-------------+-----+--+
| score.id | score.name | score.sex | score.mark | rn |
+-----------+-------------+------------+-------------+-----+--+
| 1 | a | male | 99.0 | 1 |
| 2 | b | female | 87.0 | 2 |
| 3 | c | male | 68.0 | 3 |
| 4 | d | female | 54.0 | 4 |
| 5 | e | male | 93.0 | 5 |
| 6 | f | female | 46.0 | 6 |
| 7 | g | male | 50.0 | 7 |
| 8 | h | female | 88.0 | 8 |
| 9 | i | male | 75.0 | 9 |
| 10 | j | male | 72.0 | 10 |
| 11 | k | female | 100.0 | 11 |
| 12 | l | female | 88.0 | 12 |
| 13 | m | male | 99.0 | 13 |
| 14 | n | female | NULL | 14 |
| 15 | o | male | NULL | 15 |
| 16 | p | female | 88.0 | 16 |
+-----------+-------------+------------+-------------+-----+--+
SELECT *,
rank() OVER(ORDER BY mark desc) rn
FROM score;
+-----------+-------------+------------+-------------+-----+--+
| score.id | score.name | score.sex | score.mark | rn |
+-----------+-------------+------------+-------------+-----+--+
| 11 | k | female | 100.0 | 1 |
| 1 | a | male | 99.0 | 2 |
| 13 | m | male | 99.0 | 2 |
| 5 | e | male | 93.0 | 4 |
| 16 | p | female | 88.0 | 5 |
| 12 | l | female | 88.0 | 5 |
| 8 | h | female | 88.0 | 5 |
| 2 | b | female | 87.0 | 8 |
| 9 | i | male | 75.0 | 9 |
| 10 | j | male | 72.0 | 10 |
| 3 | c | male | 68.0 | 11 |
| 4 | d | female | 54.0 | 12 |
| 7 | g | male | 50.0 | 13 |
| 6 | f | female | 46.0 | 14 |
| 14 | n | female | NULL | 15 |
| 15 | o | male | NULL | 15 |
+-----------+-------------+------------+-------------+-----+--+
SELECT *,
dense_rank() OVER(ORDER BY mark desc) rn
FROM score;
+-----------+-------------+------------+-------------+-----+--+
| score.id | score.name | score.sex | score.mark | rn |
+-----------+-------------+------------+-------------+-----+--+
| 11 | k | female | 100.0 | 1 |
| 1 | a | male | 99.0 | 2 |
| 13 | m | male | 99.0 | 2 |
| 5 | e | male | 93.0 | 3 |
| 16 | p | female | 88.0 | 4 |
| 12 | l | female | 88.0 | 4 |
| 8 | h | female | 88.0 | 4 |
| 2 | b | female | 87.0 | 5 |
| 9 | i | male | 75.0 | 6 |
| 10 | j | male | 72.0 | 7 |
| 3 | c | male | 68.0 | 8 |
| 4 | d | female | 54.0 | 9 |
| 7 | g | male | 50.0 | 10 |
| 6 | f | female | 46.0 | 11 |
| 14 | n | female | NULL | 12 |
| 15 | o | male | NULL | 12 |
+-----------+-------------+------------+-------------+-----+--+
SELECT *,
ROW_NUMBER() OVER(PARTITION BY sex ORDER BY id) rn
FROM score;
+-----------+-------------+------------+-------------+-----+--+
| score.id | score.name | score.sex | score.mark | rn |
+-----------+-------------+------------+-------------+-----+--+
| 2 | b | female | 87.0 | 1 |
| 4 | d | female | 54.0 | 2 |
| 6 | f | female | 46.0 | 3 |
| 8 | h | female | 88.0 | 4 |
| 11 | k | female | 100.0 | 5 |
| 12 | l | female | 88.0 | 6 |
| 14 | n | female | NULL | 7 |
| 16 | p | female | 88.0 | 8 |
| 1 | a | male | 99.0 | 1 |
| 3 | c | male | 68.0 | 2 |
| 5 | e | male | 93.0 | 3 |
| 7 | g | male | 50.0 | 4 |
| 9 | i | male | 75.0 | 5 |
| 10 | j | male | 72.0 | 6 |
| 13 | m | male | 99.0 | 7 |
| 15 | o | male | NULL | 8 |
+-----------+-------------+------------+-------------+-----+--+
SELECT *,
rank() OVER(PARTITION BY sex ORDER BY mark desc) rn
FROM score;
+-----------+-------------+------------+-------------+-----+--+
| score.id | score.name | score.sex | score.mark | rn |
+-----------+-------------+------------+-------------+-----+--+
| 11 | k | female | 100.0 | 1 |
| 16 | p | female | 88.0 | 2 |
| 12 | l | female | 88.0 | 2 |
| 8 | h | female | 88.0 | 2 |
| 2 | b | female | 87.0 | 5 |
| 4 | d | female | 54.0 | 6 |
| 6 | f | female | 46.0 | 7 |
| 14 | n | female | NULL | 8 |
| 1 | a | male | 99.0 | 1 |
| 13 | m | male | 99.0 | 1 |
| 5 | e | male | 93.0 | 3 |
| 9 | i | male | 75.0 | 4 |
| 10 | j | male | 72.0 | 5 |
| 3 | c | male | 68.0 | 6 |
| 7 | g | male | 50.0 | 7 |
| 15 | o | male | NULL | 8 |
+-----------+-------------+------------+-------------+-----+--+
SELECT *,
dense_rank() OVER(PARTITION BY sex ORDER BY mark desc) rn
FROM score;
+-----------+-------------+------------+-------------+-----+--+
| score.id | score.name | score.sex | score.mark | rn |
+-----------+-------------+------------+-------------+-----+--+
| 11 | k | female | 100.0 | 1 |
| 16 | p | female | 88.0 | 2 |
| 12 | l | female | 88.0 | 2 |
| 8 | h | female | 88.0 | 2 |
| 2 | b | female | 87.0 | 3 |
| 4 | d | female | 54.0 | 4 |
| 6 | f | female | 46.0 | 5 |
| 14 | n | female | NULL | 6 |
| 1 | a | male | 99.0 | 1 |
| 13 | m | male | 99.0 | 1 |
| 5 | e | male | 93.0 | 2 |
| 9 | i | male | 75.0 | 3 |
| 10 | j | male | 72.0 | 4 |
| 3 | c | male | 68.0 | 5 |
| 7 | g | male | 50.0 | 6 |
| 15 | o | male | NULL | 7 |
+-----------+-------------+------------+-------------+-----+--+
-- 总结:
ROW_NUMBER() 按行定序,[1,2,3]
RANK() 按值定序,[1,1,3]
DENSE_RANK() 按值定序,[1,1,2]
-- 用法:
ROW_NUMBER() OVER(PARTITION BY ),仅分区后排名,用得少
ROW_NUMBER() OVER(ORDER BY ),全窗口排序后排名,用得少
ROW_NUMBER() OVER(PARTITION BY ORDER BY ),先分组,再排序,最后排名
【注:以上用法适用于三种排名函数】
partition BY 定义窗口大小为分组大小,否则窗口大小为全表大小
-- 窗口聚合函数
SELECT *,
COUNT(*) OVER(PARTITION BY sex)
FROM score;
+-----------+-------------+------------+-------------+---------+--+
| score.id | score.name | score.sex | score.mark | _wcol0 |
+-----------+-------------+------------+-------------+---------+--+
| 16 | p | female | 88.0 | 8 |
| 14 | n | female | NULL | 8 |
| 12 | l | female | 88.0 | 8 |
| 11 | k | female | 100.0 | 8 |
| 8 | h | female | 88.0 | 8 |
| 6 | f | female | 46.0 | 8 |
| 4 | d | female | 54.0 | 8 |
| 2 | b | female | 87.0 | 8 |
| 1 | a | male | 99.0 | 8 |
| 15 | o | male | NULL | 8 |
| 7 | g | male | 50.0 | 8 |
| 13 | m | male | 99.0 | 8 |
| 3 | c | male | 68.0 | 8 |
| 5 | e | male | 93.0 | 8 |
| 10 | j | male | 72.0 | 8 |
| 9 | i | male | 75.0 | 8 |
+-----------+-------------+------------+-------------+---------+--+
SELECT *,
MAX(mark) OVER(PARTITION BY sex) max_mark,
MIN(mark) OVER(PARTITION BY sex) min_mark
FROM score
WHERE mark IS NOT null;
+-----------+-------------+------------+-------------+-----------+-----------+--+
| score.id | score.name | score.sex | score.mark | max_mark | min_mark |
+-----------+-------------+------------+-------------+-----------+-----------+--+
| 16 | p | female | 88.0 | 100.0 | 46.0 |
| 6 | f | female | 46.0 | 100.0 | 46.0 |
| 12 | l | female | 88.0 | 100.0 | 46.0 |
| 4 | d | female | 54.0 | 100.0 | 46.0 |
| 11 | k | female | 100.0 | 100.0 | 46.0 |
| 2 | b | female | 87.0 | 100.0 | 46.0 |
| 8 | h | female | 88.0 | 100.0 | 46.0 |
| 7 | g | male | 50.0 | 99.0 | 50.0 |
| 13 | m | male | 99.0 | 99.0 | 50.0 |
| 10 | j | male | 72.0 | 99.0 | 50.0 |
| 9 | i | male | 75.0 | 99.0 | 50.0 |
| 5 | e | male | 93.0 | 99.0 | 50.0 |
| 3 | c | male | 68.0 | 99.0 | 50.0 |
| 1 | a | male | 99.0 | 99.0 | 50.0 |
+-----------+-------------+------------+-------------+-----------+-----------+--+
SELECT *,
SUM(mark) OVER(PARTITION BY sex) sum_mark,
AVG(mark) OVER(PARTITION BY sex) avg_mark
FROM score;
+-----------+-------------+------------+-------------+-----------+--------------------+--+
| score.id | score.name | score.sex | score.mark | sum_mark | avg_mark |
+-----------+-------------+------------+-------------+-----------+--------------------+--+
| 16 | p | female | 88.0 | 551.0 | 78.71428571428571 |
| 14 | n | female | NULL | 551.0 | 78.71428571428571 |
| 12 | l | female | 88.0 | 551.0 | 78.71428571428571 |
| 11 | k | female | 100.0 | 551.0 | 78.71428571428571 |
| 8 | h | female | 88.0 | 551.0 | 78.71428571428571 |
| 6 | f | female | 46.0 | 551.0 | 78.71428571428571 |
| 4 | d | female | 54.0 | 551.0 | 78.71428571428571 |
| 2 | b | female | 87.0 | 551.0 | 78.71428571428571 |
| 1 | a | male | 99.0 | 556.0 | 79.42857142857143 |
| 15 | o | male | NULL | 556.0 | 79.42857142857143 |
| 7 | g | male | 50.0 | 556.0 | 79.42857142857143 |
| 13 | m | male | 99.0 | 556.0 | 79.42857142857143 |
| 3 | c | male | 68.0 | 556.0 | 79.42857142857143 |
| 5 | e | male | 93.0 | 556.0 | 79.42857142857143 |
| 10 | j | male | 72.0 | 556.0 | 79.42857142857143 |
| 9 | i | male | 75.0 | 556.0 | 79.42857142857143 |
+-----------+-------------+------------+-------------+-----------+--------------------+--+
SELECT *,
SUM(mark) OVER(ORDER BY mark) sum_mark
FROM score;
-- 窗口自上而下自动变化,遇到相同值时视为一组同时计算,窗口范围从表首行到表末行,计算范围从表首行到当前行
+-----------+-------------+------------+-------------+-----------+--+
| score.id | score.name | score.sex | score.mark | sum_mark |
+-----------+-------------+------------+-------------+-----------+--+
| 15 | o | male | NULL | NULL |
| 14 | n | female | NULL | NULL |
| 6 | f | female | 46.0 | 46.0 |
| 7 | g | male | 50.0 | 96.0 |
| 4 | d | female | 54.0 | 150.0 |
| 3 | c | male | 68.0 | 218.0 |
| 10 | j | male | 72.0 | 290.0 |
| 9 | i | male | 75.0 | 365.0 |
| 2 | b | female | 87.0 | 452.0 |
| 16 | p | female | 88.0 | 716.0 |
| 12 | l | female | 88.0 | 716.0 |
| 8 | h | female | 88.0 | 716.0 |
| 5 | e | male | 93.0 | 809.0 |
| 13 | m | male | 99.0 | 1007.0 |
| 1 | a | male | 99.0 | 1007.0 |
| 11 | k | female | 100.0 | 1107.0 |
+-----------+-------------+------------+-------------+-----------+--+
SELECT *,
SUM(mark) OVER(PARTITION BY sex ORDER BY mark) sum_mark
FROM score;
-- 如果分组则窗口边界是从组的第一行到组的最后一行
-- 如果不分组则窗口边界是从表的第一行到表的最后一行
+-----------+-------------+------------+-------------+-----------+--+
| score.id | score.name | score.sex | score.mark | sum_mark |
+-----------+-------------+------------+-------------+-----------+--+
| 14 | n | female | NULL | NULL |
| 6 | f | female | 46.0 | 46.0 |
| 4 | d | female | 54.0 | 100.0 |
| 2 | b | female | 87.0 | 187.0 |
| 16 | p | female | 88.0 | 451.0 |
| 12 | l | female | 88.0 | 451.0 |
| 8 | h | female | 88.0 | 451.0 |
| 11 | k | female | 100.0 | 551.0 |
| 15 | o | male | NULL | NULL |
| 7 | g | male | 50.0 | 50.0 |
| 3 | c | male | 68.0 | 118.0 |
| 10 | j | male | 72.0 | 190.0 |
| 9 | i | male | 75.0 | 265.0 |
| 5 | e | male | 93.0 | 358.0 |
| 1 | a | male | 99.0 | 556.0 |
| 13 | m | male | 99.0 | 556.0 |
+-----------+-------------+------------+-------------+-----------+--+
-- 窗口分析函数
SELECT *,
LEAD(mark,2,0) OVER(PARTITION BY sex ORDER BY mark) lead,
LAG(mark,2,0) OVER(PARTITION BY sex ORDER BY mark) lag
FROM score;
-- 说明:
-- 第一个参数指定要取哪个字段的值
-- 第二个参数指定向上或向下跳过几行(默认值是1)
-- 第三个参数指定当值为null时替代的默认值(默认值是null)
+-----------+-------------+------------+-------------+--------+-------+--+
| score.id | score.name | score.sex | score.mark | lead | lag |
+-----------+-------------+------------+-------------+--------+-------+--+
| 14 | n | female | NULL | 54.0 | 0.0 |
| 6 | f | female | 46.0 | 87.0 | 0.0 |
| 4 | d | female | 54.0 | 88.0 | NULL |
| 2 | b | female | 87.0 | 88.0 | 46.0 |
| 16 | p | female | 88.0 | 88.0 | 54.0 |
| 12 | l | female | 88.0 | 100.0 | 87.0 |
| 8 | h | female | 88.0 | 0.0 | 88.0 |
| 11 | k | female | 100.0 | 0.0 | 88.0 |
| 15 | o | male | NULL | 68.0 | 0.0 |
| 7 | g | male | 50.0 | 72.0 | 0.0 |
| 3 | c | male | 68.0 | 75.0 | NULL |
| 10 | j | male | 72.0 | 93.0 | 50.0 |
| 9 | i | male | 75.0 | 99.0 | 68.0 |
| 5 | e | male | 93.0 | 99.0 | 72.0 |
| 1 | a | male | 99.0 | 0.0 | 75.0 |
| 13 | m | male | 99.0 | 0.0 | 93.0 |
+-----------+-------------+------------+-------------+--------+-------+--+
SELECT *,
FIRST_VALUE(mark,true) OVER(partition BY sex ORDER BY mark desc) first,
LAST_VALUE(mark,true) OVER(partition BY sex ORDER BY mark desc) last
FROM score;
-- 说明
-- 第一个参数指定要取哪个字段的值
-- 第二个参数指定是否跳过null值(默认值是false)
+-----------+-------------+------------+-------------+--------+--------+--+
| score.id | score.name | score.sex | score.mark | first | last |
+-----------+-------------+------------+-------------+--------+--------+--+
| 11 | k | female | 100.0 | 100.0 | 100.0 |
| 16 | p | female | 88.0 | 100.0 | 88.0 |
| 12 | l | female | 88.0 | 100.0 | 88.0 |
| 8 | h | female | 88.0 | 100.0 | 88.0 |
| 2 | b | female | 87.0 | 100.0 | 87.0 |
| 4 | d | female | 54.0 | 100.0 | 54.0 |
| 6 | f | female | 46.0 | 100.0 | 46.0 |
| 14 | n | female | NULL | 100.0 | 46.0 |
| 1 | a | male | 99.0 | 99.0 | 99.0 |
| 13 | m | male | 99.0 | 99.0 | 99.0 |
| 5 | e | male | 93.0 | 99.0 | 93.0 |
| 9 | i | male | 75.0 | 99.0 | 75.0 |
| 10 | j | male | 72.0 | 99.0 | 72.0 |
| 3 | c | male | 68.0 | 99.0 | 68.0 |
| 7 | g | male | 50.0 | 99.0 | 50.0 |
| 15 | o | male | NULL | 99.0 | 50.0 |
+-----------+-------------+------------+-------------+--------+--------+--+
-- 思路:分组 -> 排序 -> 计算【排名,聚合,分析】
-- 排名 -> row_number(),rank(),dense_rank()
-- 聚合 -> count(),max(),min(),sum(),avg()
-- 分析 -> lead(),lag(),first_value(),last_value()
-- window子句分为两类:行,值范围,不支持使用的函数包括:row_number(),rank(),dense_rank(),lead(),lag()
SELECT *,
MAX(mark) OVER(ORDER BY mark rows BETWEEN unbounded preceding AND CURRENT row)
FROM score;
+-----------+-------------+------------+-------------+---------+--+
| score.id | score.name | score.sex | score.mark | _wcol0 |
+-----------+-------------+------------+-------------+---------+--+
| 15 | o | male | NULL | NULL |
| 14 | n | female | NULL | NULL |
| 6 | f | female | 46.0 | 46.0 |
| 7 | g | male | 50.0 | 50.0 |
| 4 | d | female | 54.0 | 54.0 |
| 3 | c | male | 68.0 | 68.0 |
| 10 | j | male | 72.0 | 72.0 |
| 9 | i | male | 75.0 | 75.0 |
| 2 | b | female | 87.0 | 87.0 |
| 16 | p | female | 88.0 | 88.0 |
| 12 | l | female | 88.0 | 88.0 |
| 8 | h | female | 88.0 | 88.0 |
| 5 | e | male | 93.0 | 93.0 |
| 13 | m | male | 99.0 | 99.0 |
| 1 | a | male | 99.0 | 99.0 |
| 11 | k | female | 100.0 | 100.0 |
+-----------+-------------+------------+-------------+---------+--+
SELECT *,
MAX(mark) OVER(ORDER BY mark rows BETWEEN unbounded preceding AND unbounded following)
FROM score;
+-----------+-------------+------------+-------------+---------+--+
| score.id | score.name | score.sex | score.mark | _wcol0 |
+-----------+-------------+------------+-------------+---------+--+
| 15 | o | male | NULL | 100.0 |
| 14 | n | female | NULL | 100.0 |
| 6 | f | female | 46.0 | 100.0 |
| 7 | g | male | 50.0 | 100.0 |
| 4 | d | female | 54.0 | 100.0 |
| 3 | c | male | 68.0 | 100.0 |
| 10 | j | male | 72.0 | 100.0 |
| 9 | i | male | 75.0 | 100.0 |
| 2 | b | female | 87.0 | 100.0 |
| 16 | p | female | 88.0 | 100.0 |
| 12 | l | female | 88.0 | 100.0 |
| 8 | h | female | 88.0 | 100.0 |
| 5 | e | male | 93.0 | 100.0 |
| 13 | m | male | 99.0 | 100.0 |
| 1 | a | male | 99.0 | 100.0 |
| 11 | k | female | 100.0 | 100.0 |
+-----------+-------------+------------+-------------+---------+--+
SELECT *,
MAX(mark) OVER(ORDER BY mark rows BETWEEN 2 following AND 6 following)
FROM score;
+-----------+-------------+------------+-------------+---------+--+
| score.id | score.name | score.sex | score.mark | _wcol0 |
+-----------+-------------+------------+-------------+---------+--+
| 15 | o | male | NULL | 72.0 |
| 14 | n | female | NULL | 75.0 |
| 6 | f | female | 46.0 | 87.0 |
| 7 | g | male | 50.0 | 88.0 |
| 4 | d | female | 54.0 | 88.0 |
| 3 | c | male | 68.0 | 88.0 |
| 10 | j | male | 72.0 | 93.0 |
| 9 | i | male | 75.0 | 99.0 |
| 2 | b | female | 87.0 | 99.0 |
| 16 | p | female | 88.0 | 100.0 |
| 12 | l | female | 88.0 | 100.0 |
| 8 | h | female | 88.0 | 100.0 |
| 5 | e | male | 93.0 | 100.0 |
| 13 | m | male | 99.0 | 100.0 |
| 1 | a | male | 99.0 | NULL |
| 11 | k | female | 100.0 | NULL |
+-----------+-------------+------------+-------------+---------+--+
SELECT *,
MAX(mark) OVER(ORDER BY mark range BETWEEN 20 preceding AND 20 following)
FROM score;
+-----------+-------------+------------+-------------+---------+--+
| score.id | score.name | score.sex | score.mark | _wcol0 |
+-----------+-------------+------------+-------------+---------+--+
| 15 | o | male | NULL | NULL |
| 14 | n | female | NULL | NULL |
| 6 | f | female | 46.0 | 54.0 |
| 7 | g | male | 50.0 | 68.0 |
| 4 | d | female | 54.0 | 72.0 |
| 3 | c | male | 68.0 | 88.0 |
| 10 | j | male | 72.0 | 88.0 |
| 9 | i | male | 75.0 | 93.0 |
| 2 | b | female | 87.0 | 100.0 |
| 16 | p | female | 88.0 | 100.0 |
| 12 | l | female | 88.0 | 100.0 |
| 8 | h | female | 88.0 | 100.0 |
| 5 | e | male | 93.0 | 100.0 |
| 13 | m | male | 99.0 | 100.0 |
| 1 | a | male | 99.0 | 100.0 |
| 11 | k | female | 100.0 | 100.0 |
+-----------+-------------+------------+-------------+---------+--+
-- 取成绩前3名
WITH t1 AS(
SELECT *,
DENSE_RANK() OVER(ORDER BY mark desc) dk
FROM score
)
SELECT *
FROM t1
WHERE dk <=3;
SELECT *
FROM (
SELECT *,
DENSE_RANK() OVER(ORDER BY mark desc) dk
FROM score
)t1
WHERE dk <=3;