hive高级查询(2)

-- 分组查询

SELECT sex,SUM(mark) sum_mark

FROM score

GROUP BY sex

HAVING sum_mark > 555;

SELECT sex,sum_mark

FROM(

SELECT sex,SUM(mark) sum_mark

FROM score

GROUP BY sex

) t

WHERE sum_mark > 555;

SELECT AVG(gid),SUM(gid)/COUNT(gid) FROM student;

SELECT COUNT(gid),COUNT(DISTINCT gid) FROM student;

SELECT collect_list(gid),collect_set(gid) FROM student;

+------------+--------+--+

| _c0 | _c1 |

+------------+--------+--+

| [1,1,2,2] | [1,2] |

+------------+--------+--+

SELECT collect_list(gid),collect_list(DISTINCT gid) FROM student;结果同上

-- 窗口排名函数

SELECT *,

ROW_NUMBER() OVER(ORDER BY id) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 1 | a | male | 99.0 | 1 |

| 2 | b | female | 87.0 | 2 |

| 3 | c | male | 68.0 | 3 |

| 4 | d | female | 54.0 | 4 |

| 5 | e | male | 93.0 | 5 |

| 6 | f | female | 46.0 | 6 |

| 7 | g | male | 50.0 | 7 |

| 8 | h | female | 88.0 | 8 |

| 9 | i | male | 75.0 | 9 |

| 10 | j | male | 72.0 | 10 |

| 11 | k | female | 100.0 | 11 |

| 12 | l | female | 88.0 | 12 |

| 13 | m | male | 99.0 | 13 |

| 14 | n | female | NULL | 14 |

| 15 | o | male | NULL | 15 |

| 16 | p | female | 88.0 | 16 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

rank() OVER(ORDER BY mark desc) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 11 | k | female | 100.0 | 1 |

| 1 | a | male | 99.0 | 2 |

| 13 | m | male | 99.0 | 2 |

| 5 | e | male | 93.0 | 4 |

| 16 | p | female | 88.0 | 5 |

| 12 | l | female | 88.0 | 5 |

| 8 | h | female | 88.0 | 5 |

| 2 | b | female | 87.0 | 8 |

| 9 | i | male | 75.0 | 9 |

| 10 | j | male | 72.0 | 10 |

| 3 | c | male | 68.0 | 11 |

| 4 | d | female | 54.0 | 12 |

| 7 | g | male | 50.0 | 13 |

| 6 | f | female | 46.0 | 14 |

| 14 | n | female | NULL | 15 |

| 15 | o | male | NULL | 15 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

dense_rank() OVER(ORDER BY mark desc) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 11 | k | female | 100.0 | 1 |

| 1 | a | male | 99.0 | 2 |

| 13 | m | male | 99.0 | 2 |

| 5 | e | male | 93.0 | 3 |

| 16 | p | female | 88.0 | 4 |

| 12 | l | female | 88.0 | 4 |

| 8 | h | female | 88.0 | 4 |

| 2 | b | female | 87.0 | 5 |

| 9 | i | male | 75.0 | 6 |

| 10 | j | male | 72.0 | 7 |

| 3 | c | male | 68.0 | 8 |

| 4 | d | female | 54.0 | 9 |

| 7 | g | male | 50.0 | 10 |

| 6 | f | female | 46.0 | 11 |

| 14 | n | female | NULL | 12 |

| 15 | o | male | NULL | 12 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

ROW_NUMBER() OVER(PARTITION BY sex ORDER BY id) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 2 | b | female | 87.0 | 1 |

| 4 | d | female | 54.0 | 2 |

| 6 | f | female | 46.0 | 3 |

| 8 | h | female | 88.0 | 4 |

| 11 | k | female | 100.0 | 5 |

| 12 | l | female | 88.0 | 6 |

| 14 | n | female | NULL | 7 |

| 16 | p | female | 88.0 | 8 |

| 1 | a | male | 99.0 | 1 |

| 3 | c | male | 68.0 | 2 |

| 5 | e | male | 93.0 | 3 |

| 7 | g | male | 50.0 | 4 |

| 9 | i | male | 75.0 | 5 |

| 10 | j | male | 72.0 | 6 |

| 13 | m | male | 99.0 | 7 |

| 15 | o | male | NULL | 8 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

rank() OVER(PARTITION BY sex ORDER BY mark desc) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 11 | k | female | 100.0 | 1 |

| 16 | p | female | 88.0 | 2 |

| 12 | l | female | 88.0 | 2 |

| 8 | h | female | 88.0 | 2 |

| 2 | b | female | 87.0 | 5 |

| 4 | d | female | 54.0 | 6 |

| 6 | f | female | 46.0 | 7 |

| 14 | n | female | NULL | 8 |

| 1 | a | male | 99.0 | 1 |

| 13 | m | male | 99.0 | 1 |

| 5 | e | male | 93.0 | 3 |

| 9 | i | male | 75.0 | 4 |

| 10 | j | male | 72.0 | 5 |

| 3 | c | male | 68.0 | 6 |

| 7 | g | male | 50.0 | 7 |

| 15 | o | male | NULL | 8 |

+-----------+-------------+------------+-------------+-----+--+

SELECT *,

dense_rank() OVER(PARTITION BY sex ORDER BY mark desc) rn

FROM score;

+-----------+-------------+------------+-------------+-----+--+

| score.id | score.name | score.sex | score.mark | rn |

+-----------+-------------+------------+-------------+-----+--+

| 11 | k | female | 100.0 | 1 |

| 16 | p | female | 88.0 | 2 |

| 12 | l | female | 88.0 | 2 |

| 8 | h | female | 88.0 | 2 |

| 2 | b | female | 87.0 | 3 |

| 4 | d | female | 54.0 | 4 |

| 6 | f | female | 46.0 | 5 |

| 14 | n | female | NULL | 6 |

| 1 | a | male | 99.0 | 1 |

| 13 | m | male | 99.0 | 1 |

| 5 | e | male | 93.0 | 2 |

| 9 | i | male | 75.0 | 3 |

| 10 | j | male | 72.0 | 4 |

| 3 | c | male | 68.0 | 5 |

| 7 | g | male | 50.0 | 6 |

| 15 | o | male | NULL | 7 |

+-----------+-------------+------------+-------------+-----+--+

-- 总结:

ROW_NUMBER() 按行定序,[1,2,3]

RANK() 按值定序,[1,1,3]

DENSE_RANK() 按值定序,[1,1,2]

-- 用法:

ROW_NUMBER() OVER(PARTITION BY ),仅分区后排名,用得少

ROW_NUMBER() OVER(ORDER BY ),全窗口排序后排名,用得少

ROW_NUMBER() OVER(PARTITION BY ORDER BY ),先分组,再排序,最后排名

【注:以上用法适用于三种排名函数】

partition BY 定义窗口大小为分组大小,否则窗口大小为全表大小

-- 窗口聚合函数

SELECT *,

COUNT(*) OVER(PARTITION BY sex)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 16 | p | female | 88.0 | 8 |

| 14 | n | female | NULL | 8 |

| 12 | l | female | 88.0 | 8 |

| 11 | k | female | 100.0 | 8 |

| 8 | h | female | 88.0 | 8 |

| 6 | f | female | 46.0 | 8 |

| 4 | d | female | 54.0 | 8 |

| 2 | b | female | 87.0 | 8 |

| 1 | a | male | 99.0 | 8 |

| 15 | o | male | NULL | 8 |

| 7 | g | male | 50.0 | 8 |

| 13 | m | male | 99.0 | 8 |

| 3 | c | male | 68.0 | 8 |

| 5 | e | male | 93.0 | 8 |

| 10 | j | male | 72.0 | 8 |

| 9 | i | male | 75.0 | 8 |

+-----------+-------------+------------+-------------+---------+--+

SELECT *,

MAX(mark) OVER(PARTITION BY sex) max_mark,

MIN(mark) OVER(PARTITION BY sex) min_mark

FROM score

WHERE mark IS NOT null;

+-----------+-------------+------------+-------------+-----------+-----------+--+

| score.id | score.name | score.sex | score.mark | max_mark | min_mark |

+-----------+-------------+------------+-------------+-----------+-----------+--+

| 16 | p | female | 88.0 | 100.0 | 46.0 |

| 6 | f | female | 46.0 | 100.0 | 46.0 |

| 12 | l | female | 88.0 | 100.0 | 46.0 |

| 4 | d | female | 54.0 | 100.0 | 46.0 |

| 11 | k | female | 100.0 | 100.0 | 46.0 |

| 2 | b | female | 87.0 | 100.0 | 46.0 |

| 8 | h | female | 88.0 | 100.0 | 46.0 |

| 7 | g | male | 50.0 | 99.0 | 50.0 |

| 13 | m | male | 99.0 | 99.0 | 50.0 |

| 10 | j | male | 72.0 | 99.0 | 50.0 |

| 9 | i | male | 75.0 | 99.0 | 50.0 |

| 5 | e | male | 93.0 | 99.0 | 50.0 |

| 3 | c | male | 68.0 | 99.0 | 50.0 |

| 1 | a | male | 99.0 | 99.0 | 50.0 |

+-----------+-------------+------------+-------------+-----------+-----------+--+

SELECT *,

SUM(mark) OVER(PARTITION BY sex) sum_mark,

AVG(mark) OVER(PARTITION BY sex) avg_mark

FROM score;

+-----------+-------------+------------+-------------+-----------+--------------------+--+

| score.id | score.name | score.sex | score.mark | sum_mark | avg_mark |

+-----------+-------------+------------+-------------+-----------+--------------------+--+

| 16 | p | female | 88.0 | 551.0 | 78.71428571428571 |

| 14 | n | female | NULL | 551.0 | 78.71428571428571 |

| 12 | l | female | 88.0 | 551.0 | 78.71428571428571 |

| 11 | k | female | 100.0 | 551.0 | 78.71428571428571 |

| 8 | h | female | 88.0 | 551.0 | 78.71428571428571 |

| 6 | f | female | 46.0 | 551.0 | 78.71428571428571 |

| 4 | d | female | 54.0 | 551.0 | 78.71428571428571 |

| 2 | b | female | 87.0 | 551.0 | 78.71428571428571 |

| 1 | a | male | 99.0 | 556.0 | 79.42857142857143 |

| 15 | o | male | NULL | 556.0 | 79.42857142857143 |

| 7 | g | male | 50.0 | 556.0 | 79.42857142857143 |

| 13 | m | male | 99.0 | 556.0 | 79.42857142857143 |

| 3 | c | male | 68.0 | 556.0 | 79.42857142857143 |

| 5 | e | male | 93.0 | 556.0 | 79.42857142857143 |

| 10 | j | male | 72.0 | 556.0 | 79.42857142857143 |

| 9 | i | male | 75.0 | 556.0 | 79.42857142857143 |

+-----------+-------------+------------+-------------+-----------+--------------------+--+

SELECT *,

SUM(mark) OVER(ORDER BY mark) sum_mark

FROM score;

-- 窗口自上而下自动变化,遇到相同值时视为一组同时计算,窗口范围从表首行到表末行,计算范围从表首行到当前行

+-----------+-------------+------------+-------------+-----------+--+

| score.id | score.name | score.sex | score.mark | sum_mark |

+-----------+-------------+------------+-------------+-----------+--+

| 15 | o | male | NULL | NULL |

| 14 | n | female | NULL | NULL |

| 6 | f | female | 46.0 | 46.0 |

| 7 | g | male | 50.0 | 96.0 |

| 4 | d | female | 54.0 | 150.0 |

| 3 | c | male | 68.0 | 218.0 |

| 10 | j | male | 72.0 | 290.0 |

| 9 | i | male | 75.0 | 365.0 |

| 2 | b | female | 87.0 | 452.0 |

| 16 | p | female | 88.0 | 716.0 |

| 12 | l | female | 88.0 | 716.0 |

| 8 | h | female | 88.0 | 716.0 |

| 5 | e | male | 93.0 | 809.0 |

| 13 | m | male | 99.0 | 1007.0 |

| 1 | a | male | 99.0 | 1007.0 |

| 11 | k | female | 100.0 | 1107.0 |

+-----------+-------------+------------+-------------+-----------+--+

SELECT *,

SUM(mark) OVER(PARTITION BY sex ORDER BY mark) sum_mark

FROM score;

-- 如果分组则窗口边界是从组的第一行到组的最后一行

-- 如果不分组则窗口边界是从表的第一行到表的最后一行

+-----------+-------------+------------+-------------+-----------+--+

| score.id | score.name | score.sex | score.mark | sum_mark |

+-----------+-------------+------------+-------------+-----------+--+

| 14 | n | female | NULL | NULL |

| 6 | f | female | 46.0 | 46.0 |

| 4 | d | female | 54.0 | 100.0 |

| 2 | b | female | 87.0 | 187.0 |

| 16 | p | female | 88.0 | 451.0 |

| 12 | l | female | 88.0 | 451.0 |

| 8 | h | female | 88.0 | 451.0 |

| 11 | k | female | 100.0 | 551.0 |

| 15 | o | male | NULL | NULL |

| 7 | g | male | 50.0 | 50.0 |

| 3 | c | male | 68.0 | 118.0 |

| 10 | j | male | 72.0 | 190.0 |

| 9 | i | male | 75.0 | 265.0 |

| 5 | e | male | 93.0 | 358.0 |

| 1 | a | male | 99.0 | 556.0 |

| 13 | m | male | 99.0 | 556.0 |

+-----------+-------------+------------+-------------+-----------+--+

-- 窗口分析函数

SELECT *,

LEAD(mark,2,0) OVER(PARTITION BY sex ORDER BY mark) lead,

LAG(mark,2,0) OVER(PARTITION BY sex ORDER BY mark) lag

FROM score;

-- 说明:

-- 第一个参数指定要取哪个字段的值

-- 第二个参数指定向上或向下跳过几行(默认值是1)

-- 第三个参数指定当值为null时替代的默认值(默认值是null)

+-----------+-------------+------------+-------------+--------+-------+--+

| score.id | score.name | score.sex | score.mark | lead | lag |

+-----------+-------------+------------+-------------+--------+-------+--+

| 14 | n | female | NULL | 54.0 | 0.0 |

| 6 | f | female | 46.0 | 87.0 | 0.0 |

| 4 | d | female | 54.0 | 88.0 | NULL |

| 2 | b | female | 87.0 | 88.0 | 46.0 |

| 16 | p | female | 88.0 | 88.0 | 54.0 |

| 12 | l | female | 88.0 | 100.0 | 87.0 |

| 8 | h | female | 88.0 | 0.0 | 88.0 |

| 11 | k | female | 100.0 | 0.0 | 88.0 |

| 15 | o | male | NULL | 68.0 | 0.0 |

| 7 | g | male | 50.0 | 72.0 | 0.0 |

| 3 | c | male | 68.0 | 75.0 | NULL |

| 10 | j | male | 72.0 | 93.0 | 50.0 |

| 9 | i | male | 75.0 | 99.0 | 68.0 |

| 5 | e | male | 93.0 | 99.0 | 72.0 |

| 1 | a | male | 99.0 | 0.0 | 75.0 |

| 13 | m | male | 99.0 | 0.0 | 93.0 |

+-----------+-------------+------------+-------------+--------+-------+--+

SELECT *,

FIRST_VALUE(mark,true) OVER(partition BY sex ORDER BY mark desc) first,

LAST_VALUE(mark,true) OVER(partition BY sex ORDER BY mark desc) last

FROM score;

-- 说明

-- 第一个参数指定要取哪个字段的值

-- 第二个参数指定是否跳过null值(默认值是false)

+-----------+-------------+------------+-------------+--------+--------+--+

| score.id | score.name | score.sex | score.mark | first | last |

+-----------+-------------+------------+-------------+--------+--------+--+

| 11 | k | female | 100.0 | 100.0 | 100.0 |

| 16 | p | female | 88.0 | 100.0 | 88.0 |

| 12 | l | female | 88.0 | 100.0 | 88.0 |

| 8 | h | female | 88.0 | 100.0 | 88.0 |

| 2 | b | female | 87.0 | 100.0 | 87.0 |

| 4 | d | female | 54.0 | 100.0 | 54.0 |

| 6 | f | female | 46.0 | 100.0 | 46.0 |

| 14 | n | female | NULL | 100.0 | 46.0 |

| 1 | a | male | 99.0 | 99.0 | 99.0 |

| 13 | m | male | 99.0 | 99.0 | 99.0 |

| 5 | e | male | 93.0 | 99.0 | 93.0 |

| 9 | i | male | 75.0 | 99.0 | 75.0 |

| 10 | j | male | 72.0 | 99.0 | 72.0 |

| 3 | c | male | 68.0 | 99.0 | 68.0 |

| 7 | g | male | 50.0 | 99.0 | 50.0 |

| 15 | o | male | NULL | 99.0 | 50.0 |

+-----------+-------------+------------+-------------+--------+--------+--+

-- 思路:分组 -> 排序 -> 计算【排名,聚合,分析】

-- 排名 -> row_number(),rank(),dense_rank()

-- 聚合 -> count(),max(),min(),sum(),avg()

-- 分析 -> lead(),lag(),first_value(),last_value()

-- window子句分为两类:行,值范围,不支持使用的函数包括:row_number(),rank(),dense_rank(),lead(),lag()

SELECT *,

MAX(mark) OVER(ORDER BY mark rows BETWEEN unbounded preceding AND CURRENT row)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 15 | o | male | NULL | NULL |

| 14 | n | female | NULL | NULL |

| 6 | f | female | 46.0 | 46.0 |

| 7 | g | male | 50.0 | 50.0 |

| 4 | d | female | 54.0 | 54.0 |

| 3 | c | male | 68.0 | 68.0 |

| 10 | j | male | 72.0 | 72.0 |

| 9 | i | male | 75.0 | 75.0 |

| 2 | b | female | 87.0 | 87.0 |

| 16 | p | female | 88.0 | 88.0 |

| 12 | l | female | 88.0 | 88.0 |

| 8 | h | female | 88.0 | 88.0 |

| 5 | e | male | 93.0 | 93.0 |

| 13 | m | male | 99.0 | 99.0 |

| 1 | a | male | 99.0 | 99.0 |

| 11 | k | female | 100.0 | 100.0 |

+-----------+-------------+------------+-------------+---------+--+

SELECT *,

MAX(mark) OVER(ORDER BY mark rows BETWEEN unbounded preceding AND unbounded following)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 15 | o | male | NULL | 100.0 |

| 14 | n | female | NULL | 100.0 |

| 6 | f | female | 46.0 | 100.0 |

| 7 | g | male | 50.0 | 100.0 |

| 4 | d | female | 54.0 | 100.0 |

| 3 | c | male | 68.0 | 100.0 |

| 10 | j | male | 72.0 | 100.0 |

| 9 | i | male | 75.0 | 100.0 |

| 2 | b | female | 87.0 | 100.0 |

| 16 | p | female | 88.0 | 100.0 |

| 12 | l | female | 88.0 | 100.0 |

| 8 | h | female | 88.0 | 100.0 |

| 5 | e | male | 93.0 | 100.0 |

| 13 | m | male | 99.0 | 100.0 |

| 1 | a | male | 99.0 | 100.0 |

| 11 | k | female | 100.0 | 100.0 |

+-----------+-------------+------------+-------------+---------+--+

SELECT *,

MAX(mark) OVER(ORDER BY mark rows BETWEEN 2 following AND 6 following)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 15 | o | male | NULL | 72.0 |

| 14 | n | female | NULL | 75.0 |

| 6 | f | female | 46.0 | 87.0 |

| 7 | g | male | 50.0 | 88.0 |

| 4 | d | female | 54.0 | 88.0 |

| 3 | c | male | 68.0 | 88.0 |

| 10 | j | male | 72.0 | 93.0 |

| 9 | i | male | 75.0 | 99.0 |

| 2 | b | female | 87.0 | 99.0 |

| 16 | p | female | 88.0 | 100.0 |

| 12 | l | female | 88.0 | 100.0 |

| 8 | h | female | 88.0 | 100.0 |

| 5 | e | male | 93.0 | 100.0 |

| 13 | m | male | 99.0 | 100.0 |

| 1 | a | male | 99.0 | NULL |

| 11 | k | female | 100.0 | NULL |

+-----------+-------------+------------+-------------+---------+--+

SELECT *,

MAX(mark) OVER(ORDER BY mark range BETWEEN 20 preceding AND 20 following)

FROM score;

+-----------+-------------+------------+-------------+---------+--+

| score.id | score.name | score.sex | score.mark | _wcol0 |

+-----------+-------------+------------+-------------+---------+--+

| 15 | o | male | NULL | NULL |

| 14 | n | female | NULL | NULL |

| 6 | f | female | 46.0 | 54.0 |

| 7 | g | male | 50.0 | 68.0 |

| 4 | d | female | 54.0 | 72.0 |

| 3 | c | male | 68.0 | 88.0 |

| 10 | j | male | 72.0 | 88.0 |

| 9 | i | male | 75.0 | 93.0 |

| 2 | b | female | 87.0 | 100.0 |

| 16 | p | female | 88.0 | 100.0 |

| 12 | l | female | 88.0 | 100.0 |

| 8 | h | female | 88.0 | 100.0 |

| 5 | e | male | 93.0 | 100.0 |

| 13 | m | male | 99.0 | 100.0 |

| 1 | a | male | 99.0 | 100.0 |

| 11 | k | female | 100.0 | 100.0 |

+-----------+-------------+------------+-------------+---------+--+

-- 取成绩前3名

WITH t1 AS(

SELECT *,

DENSE_RANK() OVER(ORDER BY mark desc) dk

FROM score

)

SELECT *

FROM t1

WHERE dk <=3;

SELECT *

FROM (

SELECT *,

DENSE_RANK() OVER(ORDER BY mark desc) dk

FROM score

)t1

WHERE dk <=3;

相关推荐
Dola_Pan1 小时前
Linux文件IO(二)-文件操作使用详解
java·linux·服务器
wang_book1 小时前
Gitlab学习(007 gitlab项目操作)
java·运维·git·学习·spring·gitlab
机器视觉知识推荐、就业指导2 小时前
Qt/C++事件过滤器与控件响应重写的使用、场景的不同
开发语言·数据库·c++·qt
jnrjian2 小时前
export rman 备份会占用buff/cache 导致内存压力
数据库·oracle
蜗牛^^O^2 小时前
Docker和K8S
java·docker·kubernetes
从心归零3 小时前
sshj使用代理连接服务器
java·服务器·sshj
isNotNullX3 小时前
一文解读OLAP的工具和应用软件
大数据·数据库·etl
IT毕设梦工厂4 小时前
计算机毕业设计选题推荐-在线拍卖系统-Java/Python项目实战
java·spring boot·python·django·毕业设计·源码·课程设计
小诸葛的博客4 小时前
pg入门1——使用容器启动一个pg
数据库
Ylucius4 小时前
动态语言? 静态语言? ------区别何在?java,js,c,c++,python分给是静态or动态语言?
java·c语言·javascript·c++·python·学习