-- 通过 hive 加载数据
-- 创建数据表
USE myhivebook;
CREATE TABLE IF NOT EXISTS student
(
id int,
name string
) comment '学生表'
row FORMAT delimited
fields terminated BY ',';
desc formatted student;
-- 创建数据
vi /root/data/student.dat
1001,tom
1002,jack
1003,rose
-- 加载数据
-- 复制本地文件上传到hdfs中
LOAD DATA LOCAL INPATH '/root/data/student.dat' OVERWRITE INTO TABLE student;
-- 剪切hdfs文件到其他hdfs目录
dfs -mkdir -p /hadoop/mydata;
dfs -put /root/data/student.dat /hadoop/mydata;
dfs -ls /hadoop/mydata;
LOAD data inpath '/hadoop/mydata/student.dat' overwrite INTO TABLE student;
-- 追加本地文件到表中
LOAD data local inpath '/root/data/student.dat' INTO TABLE student;
-- 追加hdfs文件到表中
TRUNCATE TABLE student;
dfs -put /root/data/student.dat /hadoop/mydata;
LOAD data inpath '/hadoop/mydata/student.dat' INTO TABLE student;
-- 查询数据
SELECT * FROM student;
-- 装载csv数据
-- 逗号分隔值(Comma-Separated Values,CSV,有时也称为字符分隔值,因为分隔字符也可以不是逗号)
-- 获取数据格式
id,name,age,mark
1,tom,22,90
2,jack,23,95
3,rose,21,99
-- 根据以上数据建表
CREATE TABLE IF NOT EXISTS csv_student
(
id int,
name string,
age int,
mark double
)
row FORMAT
serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
tblproperties('skip.header.line.count'='1')
;
-- 查询表结构
desc formatted csv_student;
-- 上传数据到linux /root/data
cat student.csv
-- 加载数据
LOAD data local inpath '/root/data/student.csv' overwrite INTO TABLE csv_student;
-- 查询业务数据
select * from csv_student;
-- 示例2:
-- 数据
id,name,age,mark
1 'tom' 22 90
2 'jack' 23 95
3 'rose' 21 99
-- 建表
DROP TABLE IF EXISTS csv_student2;
CREATE TABLE IF NOT EXISTS csv_student2
(
id int,
name string,
age int,
mark double
)
row FORMAT
serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH serdeproperties(
'separatorChar'='\t',
'quoteChar'="'"
)
tblproperties('skip.header.line.count'='1')
;
-- 加载数据
LOAD data local inpath '/root/data/student2.csv' overwrite INTO TABLE csv_student2;
-- 查询业务数据
select * from csv_student2;
-- 作业:数据通过分号分隔,字符用双引号界定
-- JSON,JSON(JavaScript Object Notation, JS对象简谱)是一种轻量级的数据交换格式。
{}表示对象
[]表示集合
id,name,age,mark
1,tom,22,90
{
"id": 1,
"name": "tom",
"age": 22,
"mark": 90
}
[
{
"id": 1,
"name": "tom",
"age": 22,
"mark": 90
},
{
"id": 1,
"name": "tom",
"age": 22,
"mark": 90
}
]
-- 获取数据样例
{"name":"Michael"}
{"name":"Andy","Age":30}
{"name":"Justin","Age":19}
-- 下载第三方jar包专用于解析json格式数据,上传到linux中:/root/data
-- 加载jar包
add jar /root/data/json-serde-1.3.8-jar-with-dependencies.jar;
list jar;
delete jar /root/data/json-serde-1.3.8-jar-with-dependencies.jar;
-- 建表
CREATE TABLE IF NOT EXISTS json_users
(
name string,
age int
)
row FORMAT
serde "org.openx.data.jsonserde.JsonSerDe";
-- 把数据上传到 /root/data/ 加载数据
LOAD data local inpath '/root/data/users.json' overwrite INTO TABLE json_users;
-- 查询数据
SELECT * FROM json_users;
-- 分区
-- 静态分区和动态分区
-- 创建静态分区表
CREATE TABLE IF NOT EXISTS student_static_partition
(
id int,
name string,
age int
)
partitioned by(sex string)
row FORMAT delimited
fields terminated BY ',';
-- 查看表结构
desc student_static_partition;
-- 添加分区
ALTER TABLE student_static_partition ADD partition(sex='male');
ALTER TABLE student_static_partition ADD partition(sex='female');
-- 或
ALTER TABLE student_static_partition ADD partition(sex='male') partition(sex='female');
-- 删除分区
ALTER TABLE student_static_partition DROP partition(sex='male');
ALTER TABLE student_static_partition DROP partition(sex='female');
-- 或
ALTER TABLE student_static_partition DROP partition(sex='male'),partition(sex='female');
-- 查看分区
show partitions student_static_partition;
+-------------+--+
| partition |
+-------------+--+
| sex=female |
| sex=male |
+-------------+--+
-- 创建数据
vi /root/data/student_male.dat
1,tom,22
2,jack,23
vi /root/data/student_female.dat
3,rose,20
4,marry,21
-- 加载分区数据
LOAD data local inpath '/root/data/student_male.dat' overwrite
INTO TABLE student_static_partition partition(sex='male');
LOAD data local inpath '/root/data/student_female.dat' overwrite
INTO TABLE student_static_partition partition(sex='female');
-- 查看数据
SELECT * FROM student_static_partition;