set var:pi_sysdate = 20241114;
Variable PI_SYSDATE set to 20241114
2. CDP中impala 创建内外表
sql复制代码
#hive3.0 默认不创建事务表的配置参数
set default_transactional_type=none;
create external table stg.hd_aml_mac_ip_ext (
machinedate string,
vc_fundacco string,
ip string
)
stored as textfile
tblproperties ('objcapabilities'='extread,extwrite');
create external table stg.hd_aml_mac_ip (
machinedate string,
vc_fundacco string,
ip string
)
stored as parquet
tblproperties ("parquet.compression"="snappy");
3. hive导出逗号分隔文件到本地
powershell复制代码
hive -e "SELECT * from student" | sed 's/\t/,/g' > /tmp/student.csv
4. hive on mr 的参数设置 开启动态分区
powershell复制代码
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=500000;
set hive.exec.max.dynamic.partitions.pernode=100000;
set mapreduce.reduce.memory.mb=4096;
5. MYSQL hive元数据
sql复制代码
set session group_concat_max_len = 20480;
select concat_ws('',
a.create_body_str,
CHAR(10),
c.tbl_comment,
CHAR(10),
b.partition_str,
CHAR(10),
a.stored_format,
CHAR(10),
d.compress_str,
';') AS create_sql
FROM (
select t.TBL_ID,
t.TBL_NAME,
case when k.INPUT_FORMAT like '%.parquet%' then 'STORED AS PARQUET'
when k.INPUT_FORMAT like '%.SequenceFile%' then 'STORED AS SEQUENCEFILE'
when k.INPUT_FORMAT like '%.Text%' then ''
else 'STORED AS NULL'
end AS stored_format,
concat_ws('',
'CREATE',
CASE t.TBL_TYPE
WHEN 'EXTERNAL_TABLE' THEN ' EXTERNAL'
ELSE '' END,
' TABLE IF NOT EXISTS ${schema}.',
t.TBL_NAME,
'(',
CHAR(10),
group_concat(concat_ws('',
g.COLUMN_NAME,
' ',
g.TYPE_NAME,
' COMMENT ',
'''',
REPLACE(REPLACE(g.COMMENT,';',' '),'; ',' '),
'''',
CHAR(10)) ORDER BY g.INTEGER_IDX separator ','),
')'
) AS create_body_str
from hive.TBLS t,hive.SDS k,hive.COLUMNS_V2 g,hive.DBS s
where t.SD_ID = k.SD_ID
and k.CD_ID = g.CD_ID
and s.DB_ID = t.DB_ID
and k.INPUT_FORMAT not like '%.kudu%'
and s.NAME = 'stg' -- 限制数据库
group by t.TBL_ID
-- limit 100
) a
left join (select t.TBL_ID,
concat_ws('','COMMENT ','''',t.param_value,'''') AS tbl_comment
from hive.TABLE_PARAMS t
where t.param_key = 'comment'
group by t.TBL_ID
) c
on c.tbl_id = a.tbl_id
left join (select t.TBL_ID,concat_ws('','PARTITIONED BY (',group_concat(concat_ws('',t.pkey_name,' ',t.pkey_type,' ','COMMENT ','''',t.pkey_comment,'''')
order by t.integer_idx separator ','),')') AS partition_str
from hive.PARTITION_KEYS t
group by t.TBL_ID) b
ON b.tbl_id = a.tbl_id
left join (select t.TBL_ID,
concat_ws('',
'TBLPROPERTIES (',
'''',
t.PARAM_KEY,
'''',
'=',
'''',
t.PARAM_VALUE,
''')') as compress_str
from hive.TABLE_PARAMS t
where t.param_key like '%compression%'
group by t.TBL_ID,t.param_key,t.param_value
-- limit 100
) d
on d.tbl_id = a.tbl_id
order by a.tbl_name;
--查看使用函数
use default;
show functions;
--查看函数所用jar&主类
show create function default.genseq;
--将jar包上传到新集群以及修改权限
hdfs dfs -put /home/app_adm/etl/udf/udf_0608.jar /user/hive/warehouse/udf_0608.jar
hdfs dfs -chown hive:hive /user/hive/warehouse/udf_0608.jar
hdfs dfs -chmod 777 /user/hive/warehouse/udf_0608.jar
--删除UDF函数,先在impala删除,再在hive中删除;
--1.impala执行
DROP FUNCTION DEFAULT.udf10(STRING, STRING);
--2.hive执行
drop function default.udf10;
--创建UDF函数 hive创建,impala刷新元数据同步。
create function default.clnseq as 'cn.com.businessmatrix.udf.HLSequenceCleaner' using jar 'hdfs:///user/hive/warehouse/udf_0608';
create function default.genseq as 'cn.com.businessmatrix.udf.HLSequenceGenerator' using jar 'hdfs:///user/hive/warehouse/udf_0608';
--将本地的文件强制推送到hdfs上面,如果文件已存在覆盖
hdfs dfs -put -f /home/file/ylb_trade_transfer_ext_out /tmp/hive/stg/ylb_trade_transfer_ext_out
--对HDFS目录进行用户赋权-用于执行hive命令
sudo -u hdfs hadoop fs -chown -R hive:supergroup /tmp/hive/stg/ylb_trade_transfer_ext
9. impala更新KUDU表 指定主键
sql复制代码
upsert into ${var:schema_ods}.mdm_ip_cust(
sk_invpty_of_cust
,gp_flag
)
select t.sk_invpty_of_cust,
0 as gp_flag
from ods.mdm_ip_cust t
where t.gp_flag is null;
10.使用hadoop的archive将小文件归档
sql复制代码
--用来控制归档是否可用
set hive.archive.enabled=true;
--通知Hive在创建归档时是否可以设置父目录
set hive.archive.har.parentdir.settable=true;
--控制需要归档文件的大小
set har.partfile.size=1099511627776;
--使用以下命令进行归档
ALTER TABLE A ARCHIVE PARTITION(dt='2020-12-24', hr='12');
--对已归档的分区恢复为原文件
ALTER TABLE A UNARCHIVE PARTITION(dt='2020-12-24', hr='12');
--hive 列传行
select new_fundaccount,new_bk_tradeaccount,bk_product from stg.tt0liquidateschema_tmp01
LATERAL VIEW explode(split(fundaccount,','))fundaccount as new_fundaccount
LATERAL VIEW explode(split(bk_tradeaccount,','))bk_tradeaccount as new_bk_tradeaccount;
--例子:
create table tmp_dz as
select '000855' as bk_product,
'372402834320,37345435345435,37345343434' as fundaccount,
'982342242322342,9842423424,98345333' as tradeaccount from dual;
insert into tmp_dz
select '000845' as bk_product,
'37345343454' as fundaccount,
'98345333433' as tradeaccount from dual;
select nvl(new_fundaccount,fundaccount) as fundaccount,
nvl(new_tradeaccount,tradeaccount) as tradeaccount,
bk_product
from (
SELECT REGEXP_SUBSTR(fundaccount, '[^,]+', 1, ROWNUM) as new_fundaccount,
REGEXP_SUBSTR(tradeaccount, '[^,]+', 1, ROWNUM) as new_tradeaccount,
t.*
FROM tmp_dz t
CONNECT BY ROWNUM <= regexp_count(fundaccount, ',') + 1
) t;
19.hive 列传行 多列逗号分割的字段
sql复制代码
-- 测试数据
with temp as
(select '1,2,3' as id,
'a,b,c' as name union select '4,5,6' as id,
'd,e,f' as name)
-- 添加where限制
select id, name, s_id, s_name
from temp lateral view posexplode(split(id, ',' )) t as s_id_index,
s_id lateral view posexplode(split(name, ',' )) t as s_name_index,
s_name
where s_id_index = s_name_index
20.CDPhive支持事务,增删改查
sql复制代码
--默认是支持update\delete操作,创建表不用指定 transactional=true,创建表并尝试插入数据。
create table cdhadmin_table_hive (col1 int ) ;
--插入操作
insert into table cdhadmin_table_hive values (1);
insert into table cdhadmin_table_hive values (51);
insert into table cdhadmin_table_hive values (2);
insert into table cdhadmin_table_hive values (3);
select * from cdhadmin_table_hive;
--删除操作
delete from cdhadmin_table_hive where col1 = 51;
select * from cdhadmin_table_hive;
--更新操作
update cdhadmin_table_hive set col1=300 where col1=3;
select * from cdhadmin_table_hive;
--使用hive用户,给应用用户赋权,使其可以访问default库。
grant select on database default to user cdhadmin;