创建分区表: create [external] table [if not exists] 表名(字段名 字段类型 , 字段名 字段类型 , ... )partitioned by (分区字段名 分区字段类型)... ;
自动生成分区目录并插入数据: load data [local] inpath '文件路径' into table 分区表名 partition (分区字段名='值');
注意: 如果加local后面文件路径应该是linux本地路径,如果没有加那么就是hdfs文件路径
建表
sql复制代码
create database hive3;
use hive3;
--练习1 创建一级分区表
-- 创建分区表: create [external] table [if not exists]
-- 表名(字段名 字段类型 , 字段名 字段类型 , ... )
-- partitioned by (分区字段名 分区字段类型) ;
-- 自动生成分区目录并插入数据: load data [local] inpath
-- '文件路径' into table 分区表名 partition (分区字段名='值');
--建表
create table if not exists
score (name string,subject string,grade int)
partitioned by (dt string)
row format delimited
fields terminated by '\t'
;
此为score.txt文件
sql复制代码
--在hdfs的网页中手动上传score.txt到目录,下面每一次load data都会把这个文件移动
-- 加载数据
load data inpath '/score.txt' into table score partition (dt='2022');
load data inpath '/score.txt' into table score partition (dt='2023');
load data inpath '/score.txt' into table score partition (dt='2024');
sql复制代码
--查询数据
select * from score;--此时dt的三个年份都存在了这个表里
--如,查询2023年的数据,效率提升
select * from score where dt = '2023';
select * from score where dt = '2022';
select * from score where dt = '2024';
可以直接根据年份作为条件来查询表的内容,结果如下
2.一级分区表练习2
1.建表
sql复制代码
--练习2建一个新表,-- 创建分区表: create [external] table [if not exists]
-- 表名(字段名 字段类型 , 字段名 字段类型 , ... )
-- partitioned by (分区字段名 分区字段类型) ;
--建表
create table one_part_order(
oid string,
name string,
price double,
num int
)partitioned by (year string)
row format delimited
fields terminated by ' ';
2.加载数据
四个order.txt文件如下
sql复制代码
--加载数据,现在hdfs中准备好文件,再使用load加载数据到分区表中
load data inpath '/itcast/order202251.txt'
into table one_part_order partition (year='2022');
load data inpath '/itcast/order2023415.txt'
into table one_part_order partition (year='2023');
load data inpath '/itcast/order202351.txt'
into table one_part_order partition (year='2023');
load data inpath '/itcast/order202352.txt'
into table one_part_order partition (year='2023');
3.验证数据
sql复制代码
select * from one_part_order ;
select * from one_part_order where year = '2023';
3.创建多级分区表
复制代码
创建分区表: create [external] table [if not exists] 表名(字段名 字段类型 , 字段名 字段类型 , ... )partitioned by (一级分区字段名 分区字段类型, 二级分区字段名 分区字段类型 , ...) ;
自动生成分区目录并插入数据: load data [local] inpath '文件路径' into table 分区表名 partition (一级分区字段名='值',二级分区字段名='值' , ...);
注意: 如果加local后面文件路径应该是linux本地路径,如果没有加那么就是hdfs文件路径
1.建表
sql复制代码
--删表
drop table more_part_order;
truncate table multi_part_order;
--建表
create table multi_part_order(
oid string,
pname string,
price double,
num int)partitioned by (year string,month string,day string)
row format delimited
fields terminated by ' ';
或者
sql复制代码
--建表
create table multi_part1_order(
oid string,
pname string,
price double,
num int)partitioned by (year string,month string,day string)
row format delimited
fields terminated by ' ';
2.加载数据
sql复制代码
--加载数据
load data inpath '/itcast/order202251.txt' into table
multi_part_order partition (year='2022',month='05',day='01');
load data inpath '/itcast/order2023415.txt' into table
multi_part_order partition (year='2023',month='04',day='15');
load data inpath '/itcast/order202351.txt' into table
multi_part_order partition (year='2023',month='05',day='01');
load data inpath '/itcast/order202352.txt' into table
multi_part_order partition (year='2023',month='05',day='02');
或者
sql复制代码
--加载数据
load data inpath '/itcast/order202251.txt' into table
multi_part1_order partition (year='2022',month='2022-05',day='2022-05-01');
load data inpath '/itcast/order2023415.txt' into table
multi_part1_order partition (year='2023',month='2023-04',day='2023-04-15');
load data inpath '/itcast/order202351.txt' into table
multi_part1_order partition (year='2023',month='2023-05',day='2023-05-01');
load data inpath '/itcast/order202352.txt' into table
multi_part1_order partition (year='2023',month='2023-05',day='2023-05-02');
3.验证数据
sql复制代码
--验证数据
select * from multi_part1_order;
select * from multi_part1_order where day = '2023-05-01';
需求1:查询日期为2023年5月1日的商品
需求2:统计日期2023年5月1日的商品销售额
sql复制代码
--统计2023年5月1日,商品的销售额
select sum(price*num) as money from multi_part_order
where year='2023'and month='05'and day='01';
4.分区表操作
添加,删除
sql复制代码
--------------------------------分区表操作------------------------------
--添加分区:alter table 分区表名 add partition (分区字段名='值',....);
select * from multi_part1_order;
alter table multi_part1_order add partition (year='2024',month='5',day='01');
--删除分区:alter table 分区表名 drop partition(分区字段名='值',....);
alter table multi_part1_order drop partition (year='2024');
alter table multi_part1_order drop partition (year='2024',month='5',day='01');
alter table multi_part1_order drop partition (year='2024',month='5');
修改
sql复制代码
--修改分区:alter table 分区表名 partition (分区字段名='旧值' , ...)rename to partition (分区字段名='新值' , ...);
alter table multi_part1_order partition (year='2024',month='5',day='01')
rename to partition (year='2030',month='5',day='01');
--本质上是改了原本day01,被移动.并新增了year=2024的目录
查看
sql复制代码
-- 查看所有分区: show partitons 分区表名;
show partitions multi_part1_order;
-- 同步/修复分区: msck repair table 分区表名;
msck repair table multi_part1_order;
5.分桶表
创建基础分桶表:
create [external] table [if not exists] 表名(字段名 字段类型 )clustered by (分桶字段名)
into 桶数量 buckets ;
1.建表
sql复制代码
- 创建基础分桶表:
-- create [external] table [if not exists] 表名(字段名 字段类型)clustered by (分桶字段名)into 桶数量 buckets ;
--建表
create table course_base(
cid int,
cname string,
sname string
)clustered by (cid) into 3 buckets
row format delimited fields terminated by '\t';
2.加载数据
sql复制代码
--加载数据
load data inpath '/itcast/course.txt'into table course_base;
----建表,
create table test_array_1(
name string,
location array<string>
)row format delimited
fields terminated by '\t'
collection items terminated by ',';
2.加载数据
sql复制代码
--加载数据
load data inpath '/itcast/data_for_array_type.txt' into table test_array_1;
3.验证数据
sql复制代码
--验证数据
select * from test_array_1;
--zhangsan,"[""beijing"",""shanghai"",""tianjin"",""hangzhou""]"
--wangwu,"[""changchun"",""chengdu"",""wuhan"",""beijin""]"
4.需求:查询张三是否在天津住过?
sql复制代码
select array_contains(location,'tianjin')from test_array_1 where name = 'zhangsan';
--结果:true
需求:查询张三的地址有几个?
sql复制代码
select size(location)from test_array_1 where name = 'zhangsan';
--结果:4
6.需求:查询王五的第二个地址?
sql复制代码
select location[1] from test_array_1 where name = 'wangwu';
--结果:chengdu
--创建表
create table test_map_1(
id int,
name string,
members map<string,string>,
age int
)row format delimited
fields terminated by ','
collection items terminated by '#'
map keys terminated by ':';
2.加载数据
sql复制代码
-- 加载数据
load data inpath '/itcast/data_for_map_type.txt'into table test_map_1;