文章目录
- 1.数据准备
- [2.双重group by实现 解决数据倾斜](#2.双重group by实现 解决数据倾斜)
-
- [2.1 第一层加盐group by](#2.1 第一层加盐group by)
- [2.2 第二层去盐group by](#2.2 第二层去盐group by)
1.数据准备
create table wordcount(a string) row format delimited fields terminated by ',';
load data local inpath 'opt/2.txt' into table wordcount;
hive (default)> select * from wordcount;
OK
wordcount.a
b
a
a
a
a
b
b
c
c
e
d
2.双重group by实现 解决数据倾斜
随机数:ceil(rand()*10)
select split(salt_a,'')[1] alpah ,sum(count) from
(
select concat_ws(' ',cast(ceil(rand()*10) as string),a) salt_a,count(1) count from wordcount group by concat_ws('',cast(ceil(rand()*10) as string),a)
) b group by split(salt_a,'')[1];
alpah _c1
a 4
b 3
c 2
d 1
e 1
解析:
2.1 第一层加盐group by
select concat_ws('',cast(ceil(rand()*10) as string),a) salt_a,count(1) count from wordcount group by concat_ws(' ',cast(ceil(rand()*10) as string),a)
salt_a count
10_a 1
10_b 1
1_a 2
2_a 1
3_b 1
4_b 1
4_c 1
4_d 1
6_c 1
7_e 1
Time taken: 176.729 seconds, Fetched: 10 row(s)
2.2 第二层去盐group by
select split(salt_a,'_')[1] alpah ,sum(count) from
b group by split(salt_a,'_')[1];