c
--数据建表
drop table if exists db.tb_name;
create table if not exists db.tb_name
( suj1 string
,suj2 string
)
;
insert overwrite table db.tb_name
values
("语文","数学")
,("语文","英语")
,("数学","语文")
,("物理","化学")
,("生物","物理")
,("化学","物理")
;
1.通过常规的分组排序/分组去重,该种方式去重时考虑了前后顺序,无法满足要求
1)分组排序
c
select * from
(
select suj1
,suj2
,row_number() over (partition by suj1,suj2 order by 1 desc) rn
from
db.tb_name
) t
where rn=1
;
2)分组
c
select suj1,suj2
from
db.tb_name
group by suj1,suj2
;
2.分组排序是将分组前后顺序统一
c
select suj1,suj2 from
(
select suj1
,suj2
,row_number() over (partition by concat_ws(',',sort_array(split(concat(suj1,',',suj2),','))) order by 1 desc) as rn
from
db.tb_name
) t
where rn=1
;
最终去重后输出的结果: