hive(2)

-- 复习

CREATE TABLE IF NOT EXISTS dept_partition

(

deptno int,

dname string,

loc string

)

partitioned BY(month string)

row FORMAT delimited

fields terminated BY '\t'

;

DESC dept_partition;

show partitions dept_partition;

ALTER TABLE dept_partition ADD partition(month='201904');

ALTER TABLE dept_partition ADD partition(month='201905') partition(month='201906');

ALTER TABLE dept_partition drop partition(month='201904');

ALTER TABLE dept_partition drop partition(month='201905'),partition(month='201906');

LOAD data local inpath '/root/data/dept.txt' overwrite INTO TABLE `dept_partition`

partition(`month`='201904',`month`='201905');

-- 静态分区之二级分区

CREATE TABLE IF NOT EXISTS dept_partition2

(

deptno int,

dname string,

loc string

)

partitioned BY(month int,date int)

row FORMAT delimited

fields terminated BY '\t'

;

ALTER TABLE dept_partition2 ADD partition(month=201904,date=01);

ALTER TABLE dept_partition2 ADD partition(month=201904,date=02) partition(month=201904,date=03);

show partitions dept_partition2;

ALTER TABLE dept_partition2 drop partition(month=201904,date=01);

ALTER TABLE dept_partition2 drop partition(month=201904,date=02),partition(month=201904,date=03);

LOAD data local inpath '/root/data/dept.txt' overwrite INTO TABLE `dept_partition2` partition(month=201904,date=01);

SELECT * FROM dept_partition2;

-- 动态分区

-- 准备数据student_male_female.txt

001,tom,22,male

002,jack,23,male

003,rose,20,female

004,mike,25,male

005,lucy,21,female

006,andy,20,female

-- 建表

DROP TABLE IF EXISTS student_info;

CREATE TEMPORARY TABLE IF NOT EXISTS student_info

(

id string,

name string,

age int,

sex string

)

row FORMAT delimited

fields terminated BY ','

;

-- 加载数据

LOAD data local inpath '/root/data/student_male_female.txt' overwrite INTO TABLE `student_info`;

-- 查看数据

SELECT * FROM student_info;

-- 建分区表

DROP TABLE IF EXISTS student_partition;

CREATE TABLE IF NOT EXISTS student_partition

(

id string,

name string,

age int

)

partitioned BY (sex string);

-- 检查是否已开启动态分区模式【默认为true】

set hive.exec.dynamic.partition=true;

-- 关闭严格模式

set hive.exec.dynamic.partition.mode=nonstrict;

-- 插入数据【注意:所有字段顺序对应,分区字段必须放在最后】

INSERT INTO TABLE `student_partition` partition(sex)

SELECT id,name,age,sex FROM student_info;

-- 查看数据

SELECT * FROM `student_partition`;

show partitions `student_partition`;

-- 示例2

-- 准备数据student_partition2.txt

01,tom,22,2019-09-08

02,jack,23,2020-08-09

03,rose,20,2019-09-08

04,smith,21,2020-07-09

05,john,22,2019-09-08

-- 建临时表

CREATE TEMPORARY TABLE IF NOT EXISTS student_info2

(

id string,

name string,

age int,

start_date date

)

row FORMAT delimited

fields terminated BY ','

;

-- 加载数据

LOAD data local inpath '/root/data/student_partition2.txt' overwrite INTO TABLE `student_info2`;

-- 查看数据

SELECT * FROM student_info2;

-- 建分区表

CREATE TABLE IF NOT EXISTS student_partition2

(

id string,

name string,

age int,

start_date date

)

partitioned BY(year string,month string);

-- 查看数据

DESC student_partition2;

-- 插入数据

INSERT INTO TABLE student_partition2 partition(year,month)

SELECT id,name,age,start_date,YEAR(start_date) year,MONTH(start_date) month

FROM student_info2;

-- 分桶表

-- 准备数据bucket.txt

1,aa

2,bb

3,cc

4,dd

5,ee

6,ff

7,gg

8,hh

9,ii

10,jj

11,kk

12,ll

13,mm

14,nn

15,oo

16,pp

-- 建表

CREATE TABLE IF NOT EXISTS bucket_table

(

id int,

name string

)

row FORMAT delimited

fields terminated BY ','

;

-- 加载数据

LOAD data local inpath '/root/data/bucket.txt' overwrite INTO TABLE `bucket_table`;

-- 查看数据

SELECT * FROM bucket_table;

-- 设置分桶功能开关【默认值false】

SET hive.enforce.bucketing=true;

-- 创建分桶表

CREATE TABLE IF NOT EXISTS bucket_table2

(

id int,

name string

)

CLUSTERED BY (id)

INTO 4 buckets

row FORMAT delimited

fields terminated BY ',';

-- 插入数据

INSERT INTO TABLE bucket_table2

SELECT id,name FROM bucket_table;

-- 查看数据

SELECT id,name FROM bucket_table2;

-- 侧视图

-- 建表

CREATE TABLE employee AS

select * from managed_employee;

-- 查看数据

select * from employee;

SELECT work_place FROM employee;

+-------------------------+--+

| work_place |

+-------------------------+--+

| ["Montreal","Toronto"] |

| ["Montreal"] |

| ["New York"] |

| ["Vancouver"] |

+-------------------------+--+

SELECT explode(work_place) FROM employee;

+------------+--+

| col |

+------------+--+

| Montreal |

| Toronto |

| Montreal |

| New York |

| Vancouver |

+------------+--+

SELECT name,work_place FROM employee;

+----------+-------------------------+--+

| name | work_place |

+----------+-------------------------+--+

| Michael | ["Montreal","Toronto"] |

| Will | ["Montreal"] |

| Shelley | ["New York"] |

| Lucy | ["Vancouver"] |

+----------+-------------------------+--+

SELECT name,explode(work_place) FROM employee;

-- 报错

FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested

in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support

ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)

-- 修正

SELECT name,wp

FROM employee lateral VIEW explode(work_place) t1 AS wp;

+----------+------------+--+

| name | wp |

+----------+------------+--+

| Michael | Montreal |

| Michael | Toronto |

| Will | Montreal |

| Shelley | New York |

| Lucy | Vancouver |

+----------+------------+--+

SELECT skills_score FROM employee;

+-----------------------+--+

| skills_score |

+-----------------------+--+

| {"DB":80} |

| {"Perl":85} |

| {"Python":80} |

| {"Sales":89,"HR":94} |

+-----------------------+--+

SELECT explode(skills_score) FROM employee;

+---------+--------+--+

| key | value |

+---------+--------+--+

| DB | 80 |

| Perl | 85 |

| Python | 80 |

| Sales | 89 |

| HR | 94 |

+---------+--------+--+

SELECT name,skills_score FROM employee;

+----------+-----------------------+--+

| name | skills_score |

+----------+-----------------------+--+

| Michael | {"DB":80} |

| Will | {"Perl":85} |

| Shelley | {"Python":80} |

| Lucy | {"Sales":89,"HR":94} |

+----------+-----------------------+--+

SELECT name,explode(skills_score) FROM employee;

-- 报错

FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested

in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support

ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)

-- 修正

SELECT name,skill,score

FROM employee lateral VIEW explode(skills_score) t1 AS skill,score;

+----------+---------+--------+--+

| name | skill | score |

+----------+---------+--------+--+

| Michael | DB | 80 |

| Will | Perl | 85 |

| Shelley | Python | 80 |

| Lucy | Sales | 89 |

| Lucy | HR | 94 |

+----------+---------+--------+--+

SELECT sex_age FROM employee;

+----------------------------+--+

| sex_age |

+----------------------------+--+

| {"sex":"Male","age":30} |

| {"sex":"Male","age":35} |

| {"sex":"Female","age":27} |

| {"sex":"Female","age":57} |

+----------------------------+--+

SELECT explode(sex_age) FROM employee;

-- 报错

FAILED: UDFArgumentException explode() takes an array or a map as a parameter

Error: Error while compiling statement: FAILED: UDFArgumentException explode() takes an array or a map

as a parameter (state=42000,code=40000)

-- 修正

SELECT array(sex_age) FROM employee;

+------------------------------+--+

| _c0 |

+------------------------------+--+

| [{"sex":"Male","age":30}] |

| [{"sex":"Male","age":35}] |

| [{"sex":"Female","age":27}] |

| [{"sex":"Female","age":57}] |

+------------------------------+--+

SELECT inline(array(sex_age)) FROM employee;

+---------+------+--+

| sex | age |

+---------+------+--+

| Male | 30 |

| Male | 35 |

| Female | 27 |

| Female | 57 |

+---------+------+--+

SELECT name,inline(array(sex_age)) FROM employee;

-- 报错

FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested

in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support

ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)

-- 修正

SELECT name,sex,age

FROM employee lateral VIEW inline(array(sex_age)) t1 AS sex,age;

+----------+---------+------+--+

| name | sex | age |

+----------+---------+------+--+

| Michael | Male | 30 |

| Will | Male | 35 |

| Shelley | Female | 27 |

| Lucy | Female | 57 |

+----------+---------+------+--+

SELECT * FROM employee;

+----------------+-------------------------+----------------------------+------------------------+-----

---------------------------------------------+--+| employee.name | employee.work_place | employee.sex_age | employee.skills_score |

employee.dept_title |+----------------+-------------------------+----------------------------+------------------------+-----

---------------------------------------------+--+| Michael | ["Montreal","Toronto"] | {"sex":"Male","age":30} | {"DB":80} | {"Pr

oduct":"Developer","Administration":"Lead"} || Will | ["Montreal"] | {"sex":"Male","age":35} | {"Perl":85} | {"Pr

oduct":"Lead","Test":"Lead"} || Shelley | ["New York"] | {"sex":"Female","age":27} | {"Python":80} | {"Te

st":"Lead","COE":"Architect"} || Lucy | ["Vancouver"] | {"sex":"Female","age":57} | {"Sales":89,"HR":94} | {"Sa

les":"Lead"} |+----------------+-------------------------+----------------------------+------------------------+-----

---------------------------------------------+--+

SELECT name,wp,sex,age,skill,score,dept,title

FROM employee

lateral VIEW explode(work_place) t AS wp

lateral VIEW inline(array(sex_age)) t AS sex,age

lateral VIEW explode(skills_score) t AS skill,score

lateral VIEW explode(dept_title) t AS dept,title;

+----------+------------+---------+------+---------+--------+-----------------+------------+--+

| name | wp | sex | age | skill | score | dept | title |

+----------+------------+---------+------+---------+--------+-----------------+------------+--+

| Michael | Montreal | Male | 30 | DB | 80 | Product | Developer |

| Michael | Montreal | Male | 30 | DB | 80 | Administration | Lead |

| Michael | Toronto | Male | 30 | DB | 80 | Product | Developer |

| Michael | Toronto | Male | 30 | DB | 80 | Administration | Lead |

| Will | Montreal | Male | 35 | Perl | 85 | Product | Lead |

| Will | Montreal | Male | 35 | Perl | 85 | Test | Lead |

| Shelley | New York | Female | 27 | Python | 80 | Test | Lead |

| Shelley | New York | Female | 27 | Python | 80 | COE | Architect |

| Lucy | Vancouver | Female | 57 | Sales | 89 | Sales | Lead |

| Lucy | Vancouver | Female | 57 | HR | 94 | Sales | Lead |

+----------+------------+---------+------+---------+--------+-----------------+------------+--+

SELECT split('aa,bb,cc',',');

+-------------------+--+

| _c0 |

+-------------------+--+

| ["aa","bb","cc"] |

+-------------------+--+

SELECT explode(split('aa,bb,cc',','));

+------+--+

| col |

+------+--+

| aa |

| bb |

| cc |

+------+--+

SELECT explode(split(null,','));

+------+--+

| col |

+------+--+

+------+--+

SELECT name,wp

FROM employee

lateral VIEW explode(split(null,',')) t AS wp;

+-------+-----+--+

| name | wp |

+-------+-----+--+

+-------+-----+--+

SELECT name,wp

FROM employee

lateral VIEW outer explode(split(null,',')) t AS wp;

+----------+-------+--+

| name | wp |

+----------+-------+--+

| Michael | NULL |

| Will | NULL |

| Shelley | NULL |

| Lucy | NULL |

+----------+-------+--+

-- 重点:

建库

建表(内,外)

加载数据

(1)load data ...

(2)INSERT INTO TABLE ...

分区(静,动)

侧视图(数组,映射,结构)

相关推荐
源码_V_saaskw4 分钟前
JAVA国际版任务悬赏+接单系统源码支持IOS+Android+H5
android·java·开发语言·javascript·微信小程序
双力臂4042 小时前
Java IO流体系详解:字节流、字符流与NIO/BIO对比及文件拷贝实践
java·开发语言·nio
钮钴禄·爱因斯晨2 小时前
Java API (二):从 Object 类到正则表达式的核心详解
java·开发语言·信息可视化·正则表达式
KUKUKAKASSAN2 小时前
虚拟机扩展磁盘容量后扩展分区大小
linux·运维·服务器
努力一点9482 小时前
安装docker可视化工具 Portainer中文版(ubuntu上演示,所有docker通用) 支持控制各种容器,容器操作简单化 降低容器门槛
linux·运维·服务器·人工智能·ubuntu·docker·容器
朱包林2 小时前
ansible批量部署zabbix客户端
linux·运维·服务器·云计算·ansible·zabbix
胖大和尚2 小时前
使用 sshfs 工具将远程 Linux 主机的文件夹挂载到本地
linux·服务器·网络
Monkey-旭3 小时前
Android 蓝牙通讯全解析:从基础到实战
android·java·microsoft·蓝牙通讯
chen_note3 小时前
Linux网络信息(含ssh服务和rsync)
linux·运维·服务器·网络·ssh·远程工作·rsync
BoneToBone3 小时前
java list 与set 集合的迭代器在进行元素操作时出现数据混乱问题及原因
java·开发语言·list