hive高级查询(2)

-- 分组查询

SELECT sex,SUM(mark) sum_mark

FROM score

GROUP BY sex

HAVING sum_mark > 555;

SELECT sex,sum_mark

FROM(

SELECT sex,SUM(mark) sum_mark

FROM score

GROUP BY sex

) t

WHERE sum_mark > 555;

SELECT AVG(gid),SUM(gid)/COUNT(gid) FROM student;

SELECT COUNT(gid),COUNT(DISTINCT gid) FROM student;

SELECT collect_list(gid),collect_set(gid) FROM student;

+------------+--------+--+

| _c0 | _c1 |

+------------+--------+--+

| [1,1,2,2] | [1,2] |

+------------+--------+--+

SELECT collect_list(gid),collect_list(DISTINCT gid) FROM student;结果同上

-- 窗口排名函数

SELECT *,

ROW_NUMBER() OVER(ORDER BY id) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 1 | a | male | 99.0 | 1 |

| 2 | b | female | 87.0 | 2 |

| 3 | c | male | 68.0 | 3 |

| 4 | d | female | 54.0 | 4 |

| 5 | e | male | 93.0 | 5 |

| 6 | f | female | 46.0 | 6 |

| 7 | g | male | 50.0 | 7 |

| 8 | h | female | 88.0 | 8 |

| 9 | i | male | 75.0 | 9 |

| 10 | j | male | 72.0 | 10 |

| 11 | k | female | 100.0 | 11 |

| 12 | l | female | 88.0 | 12 |

| 13 | m | male | 99.0 | 13 |

| 14 | n | female | NULL | 14 |

| 15 | o | male | NULL | 15 |

| 16 | p | female | 88.0 | 16 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

rank() OVER(ORDER BY mark desc) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 11 | k | female | 100.0 | 1 |

| 1 | a | male | 99.0 | 2 |

| 13 | m | male | 99.0 | 2 |

| 5 | e | male | 93.0 | 4 |

| 16 | p | female | 88.0 | 5 |

| 12 | l | female | 88.0 | 5 |

| 8 | h | female | 88.0 | 5 |

| 2 | b | female | 87.0 | 8 |

| 9 | i | male | 75.0 | 9 |

| 10 | j | male | 72.0 | 10 |

| 3 | c | male | 68.0 | 11 |

| 4 | d | female | 54.0 | 12 |

| 7 | g | male | 50.0 | 13 |

| 6 | f | female | 46.0 | 14 |

| 14 | n | female | NULL | 15 |

| 15 | o | male | NULL | 15 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

dense_rank() OVER(ORDER BY mark desc) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 11 | k | female | 100.0 | 1 |

| 1 | a | male | 99.0 | 2 |

| 13 | m | male | 99.0 | 2 |

| 5 | e | male | 93.0 | 3 |

| 16 | p | female | 88.0 | 4 |

| 12 | l | female | 88.0 | 4 |

| 8 | h | female | 88.0 | 4 |

| 2 | b | female | 87.0 | 5 |

| 9 | i | male | 75.0 | 6 |

| 10 | j | male | 72.0 | 7 |

| 3 | c | male | 68.0 | 8 |

| 4 | d | female | 54.0 | 9 |

| 7 | g | male | 50.0 | 10 |

| 6 | f | female | 46.0 | 11 |

| 14 | n | female | NULL | 12 |

| 15 | o | male | NULL | 12 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

ROW_NUMBER() OVER(PARTITION BY sex ORDER BY id) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 2 | b | female | 87.0 | 1 |

| 4 | d | female | 54.0 | 2 |

| 6 | f | female | 46.0 | 3 |

| 8 | h | female | 88.0 | 4 |

| 11 | k | female | 100.0 | 5 |

| 12 | l | female | 88.0 | 6 |

| 14 | n | female | NULL | 7 |

| 16 | p | female | 88.0 | 8 |

| 1 | a | male | 99.0 | 1 |

| 3 | c | male | 68.0 | 2 |

| 5 | e | male | 93.0 | 3 |

| 7 | g | male | 50.0 | 4 |

| 9 | i | male | 75.0 | 5 |

| 10 | j | male | 72.0 | 6 |

| 13 | m | male | 99.0 | 7 |

| 15 | o | male | NULL | 8 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

rank() OVER(PARTITION BY sex ORDER BY mark desc) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 11 | k | female | 100.0 | 1 |

| 16 | p | female | 88.0 | 2 |

| 12 | l | female | 88.0 | 2 |

| 8 | h | female | 88.0 | 2 |

| 2 | b | female | 87.0 | 5 |

| 4 | d | female | 54.0 | 6 |

| 6 | f | female | 46.0 | 7 |

| 14 | n | female | NULL | 8 |

| 1 | a | male | 99.0 | 1 |

| 13 | m | male | 99.0 | 1 |

| 5 | e | male | 93.0 | 3 |

| 9 | i | male | 75.0 | 4 |

| 10 | j | male | 72.0 | 5 |

| 3 | c | male | 68.0 | 6 |

| 7 | g | male | 50.0 | 7 |

| 15 | o | male | NULL | 8 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

dense_rank() OVER(PARTITION BY sex ORDER BY mark desc) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 11 | k | female | 100.0 | 1 |

| 16 | p | female | 88.0 | 2 |

| 12 | l | female | 88.0 | 2 |

| 8 | h | female | 88.0 | 2 |

| 2 | b | female | 87.0 | 3 |

| 4 | d | female | 54.0 | 4 |

| 6 | f | female | 46.0 | 5 |

| 14 | n | female | NULL | 6 |

| 1 | a | male | 99.0 | 1 |

| 13 | m | male | 99.0 | 1 |

| 5 | e | male | 93.0 | 2 |

| 9 | i | male | 75.0 | 3 |

| 10 | j | male | 72.0 | 4 |

| 3 | c | male | 68.0 | 5 |

| 7 | g | male | 50.0 | 6 |

| 15 | o | male | NULL | 7 |

+-----------+-------------+------------+-------------+-----+--+

-- 总结:

ROW_NUMBER() 按行定序,[1,2,3]

RANK() 按值定序,[1,1,3]

DENSE_RANK() 按值定序,[1,1,2]

-- 用法:

ROW_NUMBER() OVER(PARTITION BY ),仅分区后排名,用得少

ROW_NUMBER() OVER(ORDER BY ),全窗口排序后排名,用得少

ROW_NUMBER() OVER(PARTITION BY ORDER BY ),先分组,再排序,最后排名

【注:以上用法适用于三种排名函数】

partition BY 定义窗口大小为分组大小,否则窗口大小为全表大小

-- 窗口聚合函数

SELECT *,

COUNT(*) OVER(PARTITION BY sex)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 16 | p | female | 88.0 | 8 |

| 14 | n | female | NULL | 8 |

| 12 | l | female | 88.0 | 8 |

| 11 | k | female | 100.0 | 8 |

| 8 | h | female | 88.0 | 8 |

| 6 | f | female | 46.0 | 8 |

| 4 | d | female | 54.0 | 8 |

| 2 | b | female | 87.0 | 8 |

| 1 | a | male | 99.0 | 8 |

| 15 | o | male | NULL | 8 |

| 7 | g | male | 50.0 | 8 |

| 13 | m | male | 99.0 | 8 |

| 3 | c | male | 68.0 | 8 |

| 5 | e | male | 93.0 | 8 |

| 10 | j | male | 72.0 | 8 |

| 9 | i | male | 75.0 | 8 |

+-----------+-------------+------------+-------------+---------+--+

SELECT *,

MAX(mark) OVER(PARTITION BY sex) max_mark,

MIN(mark) OVER(PARTITION BY sex) min_mark

FROM score

WHERE mark IS NOT null;

+-----------+-------------+------------+-------------+-----------+-----------+--+

| score.id | score.name | score.sex | score.mark | max_mark | min_mark |

+-----------+-------------+------------+-------------+-----------+-----------+--+

| 16 | p | female | 88.0 | 100.0 | 46.0 |

| 6 | f | female | 46.0 | 100.0 | 46.0 |

| 12 | l | female | 88.0 | 100.0 | 46.0 |

| 4 | d | female | 54.0 | 100.0 | 46.0 |

| 11 | k | female | 100.0 | 100.0 | 46.0 |

| 2 | b | female | 87.0 | 100.0 | 46.0 |

| 8 | h | female | 88.0 | 100.0 | 46.0 |

| 7 | g | male | 50.0 | 99.0 | 50.0 |

| 13 | m | male | 99.0 | 99.0 | 50.0 |

| 10 | j | male | 72.0 | 99.0 | 50.0 |

| 9 | i | male | 75.0 | 99.0 | 50.0 |

| 5 | e | male | 93.0 | 99.0 | 50.0 |

| 3 | c | male | 68.0 | 99.0 | 50.0 |

| 1 | a | male | 99.0 | 99.0 | 50.0 |

+-----------+-------------+------------+-------------+-----------+-----------+--+

SELECT *,

SUM(mark) OVER(PARTITION BY sex) sum_mark,

AVG(mark) OVER(PARTITION BY sex) avg_mark

FROM score;

+-----------+-------------+------------+-------------+-----------+--------------------+--+

| score.id | score.name | score.sex | score.mark | sum_mark | avg_mark |

+-----------+-------------+------------+-------------+-----------+--------------------+--+

| 16 | p | female | 88.0 | 551.0 | 78.71428571428571 |

| 14 | n | female | NULL | 551.0 | 78.71428571428571 |

| 12 | l | female | 88.0 | 551.0 | 78.71428571428571 |

| 11 | k | female | 100.0 | 551.0 | 78.71428571428571 |

| 8 | h | female | 88.0 | 551.0 | 78.71428571428571 |

| 6 | f | female | 46.0 | 551.0 | 78.71428571428571 |

| 4 | d | female | 54.0 | 551.0 | 78.71428571428571 |

| 2 | b | female | 87.0 | 551.0 | 78.71428571428571 |

| 1 | a | male | 99.0 | 556.0 | 79.42857142857143 |

| 15 | o | male | NULL | 556.0 | 79.42857142857143 |

| 7 | g | male | 50.0 | 556.0 | 79.42857142857143 |

| 13 | m | male | 99.0 | 556.0 | 79.42857142857143 |

| 3 | c | male | 68.0 | 556.0 | 79.42857142857143 |

| 5 | e | male | 93.0 | 556.0 | 79.42857142857143 |

| 10 | j | male | 72.0 | 556.0 | 79.42857142857143 |

| 9 | i | male | 75.0 | 556.0 | 79.42857142857143 |

+-----------+-------------+------------+-------------+-----------+--------------------+--+

SELECT *,

SUM(mark) OVER(ORDER BY mark) sum_mark

FROM score;

-- 窗口自上而下自动变化,遇到相同值时视为一组同时计算,窗口范围从表首行到表末行,计算范围从表首行到当前行

+-----------+-------------+------------+-------------+-----------+--+

| score.id | score.name | score.sex | score.mark | sum_mark |

+-----------+-------------+------------+-------------+-----------+--+

| 15 | o | male | NULL | NULL |

| 14 | n | female | NULL | NULL |

| 6 | f | female | 46.0 | 46.0 |

| 7 | g | male | 50.0 | 96.0 |

| 4 | d | female | 54.0 | 150.0 |

| 3 | c | male | 68.0 | 218.0 |

| 10 | j | male | 72.0 | 290.0 |

| 9 | i | male | 75.0 | 365.0 |

| 2 | b | female | 87.0 | 452.0 |

| 16 | p | female | 88.0 | 716.0 |

| 12 | l | female | 88.0 | 716.0 |

| 8 | h | female | 88.0 | 716.0 |

| 5 | e | male | 93.0 | 809.0 |

| 13 | m | male | 99.0 | 1007.0 |

| 1 | a | male | 99.0 | 1007.0 |

| 11 | k | female | 100.0 | 1107.0 |

+-----------+-------------+------------+-------------+-----------+--+

SELECT *,

SUM(mark) OVER(PARTITION BY sex ORDER BY mark) sum_mark

FROM score;

-- 如果分组则窗口边界是从组的第一行到组的最后一行

-- 如果不分组则窗口边界是从表的第一行到表的最后一行

+-----------+-------------+------------+-------------+-----------+--+

| score.id | score.name | score.sex | score.mark | sum_mark |

+-----------+-------------+------------+-------------+-----------+--+

| 14 | n | female | NULL | NULL |

| 6 | f | female | 46.0 | 46.0 |

| 4 | d | female | 54.0 | 100.0 |

| 2 | b | female | 87.0 | 187.0 |

| 16 | p | female | 88.0 | 451.0 |

| 12 | l | female | 88.0 | 451.0 |

| 8 | h | female | 88.0 | 451.0 |

| 11 | k | female | 100.0 | 551.0 |

| 15 | o | male | NULL | NULL |

| 7 | g | male | 50.0 | 50.0 |

| 3 | c | male | 68.0 | 118.0 |

| 10 | j | male | 72.0 | 190.0 |

| 9 | i | male | 75.0 | 265.0 |

| 5 | e | male | 93.0 | 358.0 |

| 1 | a | male | 99.0 | 556.0 |

| 13 | m | male | 99.0 | 556.0 |

+-----------+-------------+------------+-------------+-----------+--+

-- 窗口分析函数

SELECT *,

LEAD(mark,2,0) OVER(PARTITION BY sex ORDER BY mark) lead,

LAG(mark,2,0) OVER(PARTITION BY sex ORDER BY mark) lag

FROM score;

-- 说明:

-- 第一个参数指定要取哪个字段的值

-- 第二个参数指定向上或向下跳过几行(默认值是1)

-- 第三个参数指定当值为null时替代的默认值(默认值是null)

+-----------+-------------+------------+-------------+--------+-------+--+

| score.id | score.name | score.sex | score.mark | lead | lag |

+-----------+-------------+------------+-------------+--------+-------+--+

| 14 | n | female | NULL | 54.0 | 0.0 |

| 6 | f | female | 46.0 | 87.0 | 0.0 |

| 4 | d | female | 54.0 | 88.0 | NULL |

| 2 | b | female | 87.0 | 88.0 | 46.0 |

| 16 | p | female | 88.0 | 88.0 | 54.0 |

| 12 | l | female | 88.0 | 100.0 | 87.0 |

| 8 | h | female | 88.0 | 0.0 | 88.0 |

| 11 | k | female | 100.0 | 0.0 | 88.0 |

| 15 | o | male | NULL | 68.0 | 0.0 |

| 7 | g | male | 50.0 | 72.0 | 0.0 |

| 3 | c | male | 68.0 | 75.0 | NULL |

| 10 | j | male | 72.0 | 93.0 | 50.0 |

| 9 | i | male | 75.0 | 99.0 | 68.0 |

| 5 | e | male | 93.0 | 99.0 | 72.0 |

| 1 | a | male | 99.0 | 0.0 | 75.0 |

| 13 | m | male | 99.0 | 0.0 | 93.0 |

+-----------+-------------+------------+-------------+--------+-------+--+

SELECT *,

FIRST_VALUE(mark,true) OVER(partition BY sex ORDER BY mark desc) first,

LAST_VALUE(mark,true) OVER(partition BY sex ORDER BY mark desc) last

FROM score;

-- 说明

-- 第一个参数指定要取哪个字段的值

-- 第二个参数指定是否跳过null值(默认值是false)

+-----------+-------------+------------+-------------+--------+--------+--+

| score.id | score.name | score.sex | score.mark | first | last |

+-----------+-------------+------------+-------------+--------+--------+--+

| 11 | k | female | 100.0 | 100.0 | 100.0 |

| 16 | p | female | 88.0 | 100.0 | 88.0 |

| 12 | l | female | 88.0 | 100.0 | 88.0 |

| 8 | h | female | 88.0 | 100.0 | 88.0 |

| 2 | b | female | 87.0 | 100.0 | 87.0 |

| 4 | d | female | 54.0 | 100.0 | 54.0 |

| 6 | f | female | 46.0 | 100.0 | 46.0 |

| 14 | n | female | NULL | 100.0 | 46.0 |

| 1 | a | male | 99.0 | 99.0 | 99.0 |

| 13 | m | male | 99.0 | 99.0 | 99.0 |

| 5 | e | male | 93.0 | 99.0 | 93.0 |

| 9 | i | male | 75.0 | 99.0 | 75.0 |

| 10 | j | male | 72.0 | 99.0 | 72.0 |

| 3 | c | male | 68.0 | 99.0 | 68.0 |

| 7 | g | male | 50.0 | 99.0 | 50.0 |

| 15 | o | male | NULL | 99.0 | 50.0 |

+-----------+-------------+------------+-------------+--------+--------+--+

-- 思路:分组 -> 排序 -> 计算【排名,聚合,分析】

-- 排名 -> row_number(),rank(),dense_rank()

-- 聚合 -> count(),max(),min(),sum(),avg()

-- 分析 -> lead(),lag(),first_value(),last_value()

-- window子句分为两类:行,值范围,不支持使用的函数包括:row_number(),rank(),dense_rank(),lead(),lag()

SELECT *,

MAX(mark) OVER(ORDER BY mark rows BETWEEN unbounded preceding AND CURRENT row)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 15 | o | male | NULL | NULL |

| 14 | n | female | NULL | NULL |

| 6 | f | female | 46.0 | 46.0 |

| 7 | g | male | 50.0 | 50.0 |

| 4 | d | female | 54.0 | 54.0 |

| 3 | c | male | 68.0 | 68.0 |

| 10 | j | male | 72.0 | 72.0 |

| 9 | i | male | 75.0 | 75.0 |

| 2 | b | female | 87.0 | 87.0 |

| 16 | p | female | 88.0 | 88.0 |

| 12 | l | female | 88.0 | 88.0 |

| 8 | h | female | 88.0 | 88.0 |

| 5 | e | male | 93.0 | 93.0 |

| 13 | m | male | 99.0 | 99.0 |

| 1 | a | male | 99.0 | 99.0 |

| 11 | k | female | 100.0 | 100.0 |

+-----------+-------------+------------+-------------+---------+--+

SELECT *,

MAX(mark) OVER(ORDER BY mark rows BETWEEN unbounded preceding AND unbounded following)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 15 | o | male | NULL | 100.0 |

| 14 | n | female | NULL | 100.0 |

| 6 | f | female | 46.0 | 100.0 |

| 7 | g | male | 50.0 | 100.0 |

| 4 | d | female | 54.0 | 100.0 |

| 3 | c | male | 68.0 | 100.0 |

| 10 | j | male | 72.0 | 100.0 |

| 9 | i | male | 75.0 | 100.0 |

| 2 | b | female | 87.0 | 100.0 |

| 16 | p | female | 88.0 | 100.0 |

| 12 | l | female | 88.0 | 100.0 |

| 8 | h | female | 88.0 | 100.0 |

| 5 | e | male | 93.0 | 100.0 |

| 13 | m | male | 99.0 | 100.0 |

| 1 | a | male | 99.0 | 100.0 |

| 11 | k | female | 100.0 | 100.0 |

+-----------+-------------+------------+-------------+---------+--+

SELECT *,

MAX(mark) OVER(ORDER BY mark rows BETWEEN 2 following AND 6 following)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 15 | o | male | NULL | 72.0 |

| 14 | n | female | NULL | 75.0 |

| 6 | f | female | 46.0 | 87.0 |

| 7 | g | male | 50.0 | 88.0 |

| 4 | d | female | 54.0 | 88.0 |

| 3 | c | male | 68.0 | 88.0 |

| 10 | j | male | 72.0 | 93.0 |

| 9 | i | male | 75.0 | 99.0 |

| 2 | b | female | 87.0 | 99.0 |

| 16 | p | female | 88.0 | 100.0 |

| 12 | l | female | 88.0 | 100.0 |

| 8 | h | female | 88.0 | 100.0 |

| 5 | e | male | 93.0 | 100.0 |

| 13 | m | male | 99.0 | 100.0 |

| 1 | a | male | 99.0 | NULL |

| 11 | k | female | 100.0 | NULL |

+-----------+-------------+------------+-------------+---------+--+

SELECT *,

MAX(mark) OVER(ORDER BY mark range BETWEEN 20 preceding AND 20 following)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 15 | o | male | NULL | NULL |

| 14 | n | female | NULL | NULL |

| 6 | f | female | 46.0 | 54.0 |

| 7 | g | male | 50.0 | 68.0 |

| 4 | d | female | 54.0 | 72.0 |

| 3 | c | male | 68.0 | 88.0 |

| 10 | j | male | 72.0 | 88.0 |

| 9 | i | male | 75.0 | 93.0 |

| 2 | b | female | 87.0 | 100.0 |

| 16 | p | female | 88.0 | 100.0 |

| 12 | l | female | 88.0 | 100.0 |

| 8 | h | female | 88.0 | 100.0 |

| 5 | e | male | 93.0 | 100.0 |

| 13 | m | male | 99.0 | 100.0 |

| 1 | a | male | 99.0 | 100.0 |

| 11 | k | female | 100.0 | 100.0 |

+-----------+-------------+------------+-------------+---------+--+

-- 取成绩前3名

WITH t1 AS(

SELECT *,

DENSE_RANK() OVER(ORDER BY mark desc) dk

FROM score

)

SELECT *

FROM t1

WHERE dk <=3;

SELECT *

FROM (

SELECT *,

DENSE_RANK() OVER(ORDER BY mark desc) dk

FROM score

)t1

WHERE dk <=3;

相关推荐
P.H. Infinity几秒前
【RabbitMQ】10-抽取MQ工具
数据库·分布式·rabbitmq
zgscwxd5 分钟前
thinkphp6 --数据库操作 增删改查
数据库·thinkphp6
代码小鑫12 分钟前
A031-基于SpringBoot的健身房管理系统设计与实现
java·开发语言·数据库·spring boot·后端
天天要nx30 分钟前
D64【python 接口自动化学习】- python基础之数据库
数据库·python
落落落sss1 小时前
MQ集群
java·服务器·开发语言·后端·elasticsearch·adb·ruby
我救我自己1 小时前
UE5运行时创建slate窗口
java·服务器·ue5
精进攻城狮@1 小时前
Redis(value的数据类型)
数据库·redis
2401_853275731 小时前
ArrayList 源码分析
java·开发语言
爪哇学长1 小时前
SQL 注入详解:原理、危害与防范措施
xml·java·数据库·sql·oracle
MoFe11 小时前
【.net core】【sqlsugar】字符串拼接+内容去重
java·开发语言·.netcore