hive(2)

-- 复习

CREATE TABLE IF NOT EXISTS dept_partition

(

deptno int,

dname string,

loc string

)

partitioned BY(month string)

row FORMAT delimited

fields terminated BY '\t'

;

DESC dept_partition;

show partitions dept_partition;

ALTER TABLE dept_partition ADD partition(month='201904');

ALTER TABLE dept_partition ADD partition(month='201905') partition(month='201906');

ALTER TABLE dept_partition drop partition(month='201904');

ALTER TABLE dept_partition drop partition(month='201905'),partition(month='201906');

LOAD data local inpath '/root/data/dept.txt' overwrite INTO TABLE `dept_partition`

partition(`month`='201904',`month`='201905');

-- 静态分区之二级分区

CREATE TABLE IF NOT EXISTS dept_partition2

(

deptno int,

dname string,

loc string

)

partitioned BY(month int,date int)

row FORMAT delimited

fields terminated BY '\t'

;

ALTER TABLE dept_partition2 ADD partition(month=201904,date=01);

ALTER TABLE dept_partition2 ADD partition(month=201904,date=02) partition(month=201904,date=03);

show partitions dept_partition2;

ALTER TABLE dept_partition2 drop partition(month=201904,date=01);

ALTER TABLE dept_partition2 drop partition(month=201904,date=02),partition(month=201904,date=03);

LOAD data local inpath '/root/data/dept.txt' overwrite INTO TABLE `dept_partition2` partition(month=201904,date=01);

SELECT * FROM dept_partition2;

-- 动态分区

-- 准备数据student_male_female.txt

001,tom,22,male

002,jack,23,male

003,rose,20,female

004,mike,25,male

005,lucy,21,female

006,andy,20,female

-- 建表

DROP TABLE IF EXISTS student_info;

CREATE TEMPORARY TABLE IF NOT EXISTS student_info

(

id string,

name string,

age int,

sex string

)

row FORMAT delimited

fields terminated BY ','

;

-- 加载数据

LOAD data local inpath '/root/data/student_male_female.txt' overwrite INTO TABLE `student_info`;

-- 查看数据

SELECT * FROM student_info;

-- 建分区表

DROP TABLE IF EXISTS student_partition;

CREATE TABLE IF NOT EXISTS student_partition

(

id string,

name string,

age int

)

partitioned BY (sex string);

-- 检查是否已开启动态分区模式【默认为true】

set hive.exec.dynamic.partition=true;

-- 关闭严格模式

set hive.exec.dynamic.partition.mode=nonstrict;

-- 插入数据【注意:所有字段顺序对应,分区字段必须放在最后】

INSERT INTO TABLE `student_partition` partition(sex)

SELECT id,name,age,sex FROM student_info;

-- 查看数据

SELECT * FROM `student_partition`;

show partitions `student_partition`;

-- 示例2

-- 准备数据student_partition2.txt

01,tom,22,2019-09-08

02,jack,23,2020-08-09

03,rose,20,2019-09-08

04,smith,21,2020-07-09

05,john,22,2019-09-08

-- 建临时表

CREATE TEMPORARY TABLE IF NOT EXISTS student_info2

(

id string,

name string,

age int,

start_date date

)

row FORMAT delimited

fields terminated BY ','

;

-- 加载数据

LOAD data local inpath '/root/data/student_partition2.txt' overwrite INTO TABLE `student_info2`;

-- 查看数据

SELECT * FROM student_info2;

-- 建分区表

CREATE TABLE IF NOT EXISTS student_partition2

(

id string,

name string,

age int,

start_date date

)

partitioned BY(year string,month string);

-- 查看数据

DESC student_partition2;

-- 插入数据

INSERT INTO TABLE student_partition2 partition(year,month)

SELECT id,name,age,start_date,YEAR(start_date) year,MONTH(start_date) month

FROM student_info2;

-- 分桶表

-- 准备数据bucket.txt

1,aa

2,bb

3,cc

4,dd

5,ee

6,ff

7,gg

8,hh

9,ii

10,jj

11,kk

12,ll

13,mm

14,nn

15,oo

16,pp

-- 建表

CREATE TABLE IF NOT EXISTS bucket_table

(

id int,

name string

)

row FORMAT delimited

fields terminated BY ','

;

-- 加载数据

LOAD data local inpath '/root/data/bucket.txt' overwrite INTO TABLE `bucket_table`;

-- 查看数据

SELECT * FROM bucket_table;

-- 设置分桶功能开关【默认值false】

SET hive.enforce.bucketing=true;

-- 创建分桶表

CREATE TABLE IF NOT EXISTS bucket_table2

(

id int,

name string

)

CLUSTERED BY (id)

INTO 4 buckets

row FORMAT delimited

fields terminated BY ',';

-- 插入数据

INSERT INTO TABLE bucket_table2

SELECT id,name FROM bucket_table;

-- 查看数据

SELECT id,name FROM bucket_table2;

-- 侧视图

-- 建表

CREATE TABLE employee AS

select * from managed_employee;

-- 查看数据

select * from employee;

SELECT work_place FROM employee;

+-------------------------+--+

| work_place |

+-------------------------+--+

| ["Montreal","Toronto"] |

| ["Montreal"] |

| ["New York"] |

| ["Vancouver"] |

+-------------------------+--+

SELECT explode(work_place) FROM employee;

+------------+--+

| col |

+------------+--+

| Montreal |

| Toronto |

| Montreal |

| New York |

| Vancouver |

+------------+--+

SELECT name,work_place FROM employee;

+----------+-------------------------+--+

| name | work_place |

+----------+-------------------------+--+

| Michael | ["Montreal","Toronto"] |

| Will | ["Montreal"] |

| Shelley | ["New York"] |

| Lucy | ["Vancouver"] |

+----------+-------------------------+--+

SELECT name,explode(work_place) FROM employee;

-- 报错

FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested

in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support

ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)

-- 修正

SELECT name,wp

FROM employee lateral VIEW explode(work_place) t1 AS wp;

+----------+------------+--+

| name | wp |

+----------+------------+--+

| Michael | Montreal |

| Michael | Toronto |

| Will | Montreal |

| Shelley | New York |

| Lucy | Vancouver |

+----------+------------+--+

SELECT skills_score FROM employee;

+-----------------------+--+

| skills_score |

+-----------------------+--+

| {"DB":80} |

| {"Perl":85} |

| {"Python":80} |

| {"Sales":89,"HR":94} |

+-----------------------+--+

SELECT explode(skills_score) FROM employee;

+---------+--------+--+

| key | value |

+---------+--------+--+

| DB | 80 |

| Perl | 85 |

| Python | 80 |

| Sales | 89 |

| HR | 94 |

+---------+--------+--+

SELECT name,skills_score FROM employee;

+----------+-----------------------+--+

| name | skills_score |

+----------+-----------------------+--+

| Michael | {"DB":80} |

| Will | {"Perl":85} |

| Shelley | {"Python":80} |

| Lucy | {"Sales":89,"HR":94} |

+----------+-----------------------+--+

SELECT name,explode(skills_score) FROM employee;

-- 报错

FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested

in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support

ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)

-- 修正

SELECT name,skill,score

FROM employee lateral VIEW explode(skills_score) t1 AS skill,score;

+----------+---------+--------+--+

| name | skill | score |

+----------+---------+--------+--+

| Michael | DB | 80 |

| Will | Perl | 85 |

| Shelley | Python | 80 |

| Lucy | Sales | 89 |

| Lucy | HR | 94 |

+----------+---------+--------+--+

SELECT sex_age FROM employee;

+----------------------------+--+

| sex_age |

+----------------------------+--+

| {"sex":"Male","age":30} |

| {"sex":"Male","age":35} |

| {"sex":"Female","age":27} |

| {"sex":"Female","age":57} |

+----------------------------+--+

SELECT explode(sex_age) FROM employee;

-- 报错

FAILED: UDFArgumentException explode() takes an array or a map as a parameter

Error: Error while compiling statement: FAILED: UDFArgumentException explode() takes an array or a map

as a parameter (state=42000,code=40000)

-- 修正

SELECT array(sex_age) FROM employee;

+------------------------------+--+

| _c0 |

+------------------------------+--+

| [{"sex":"Male","age":30}] |

| [{"sex":"Male","age":35}] |

| [{"sex":"Female","age":27}] |

| [{"sex":"Female","age":57}] |

+------------------------------+--+

SELECT inline(array(sex_age)) FROM employee;

+---------+------+--+

| sex | age |

+---------+------+--+

| Male | 30 |

| Male | 35 |

| Female | 27 |

| Female | 57 |

+---------+------+--+

SELECT name,inline(array(sex_age)) FROM employee;

-- 报错

FAILED: SemanticException [Error 10081]: UDTF's are not supported outside the SELECT clause, nor nested

in expressionsError: Error while compiling statement: FAILED: SemanticException [Error 10081]: UDTF's are not support

ed outside the SELECT clause, nor nested in expressions (state=42000,code=10081)

-- 修正

SELECT name,sex,age

FROM employee lateral VIEW inline(array(sex_age)) t1 AS sex,age;

+----------+---------+------+--+

| name | sex | age |

+----------+---------+------+--+

| Michael | Male | 30 |

| Will | Male | 35 |

| Shelley | Female | 27 |

| Lucy | Female | 57 |

+----------+---------+------+--+

SELECT * FROM employee;

+----------------+-------------------------+----------------------------+------------------------+-----

---------------------------------------------+--+| employee.name | employee.work_place | employee.sex_age | employee.skills_score |

employee.dept_title |+----------------+-------------------------+----------------------------+------------------------+-----

---------------------------------------------+--+| Michael | ["Montreal","Toronto"] | {"sex":"Male","age":30} | {"DB":80} | {"Pr

oduct":"Developer","Administration":"Lead"} || Will | ["Montreal"] | {"sex":"Male","age":35} | {"Perl":85} | {"Pr

oduct":"Lead","Test":"Lead"} || Shelley | ["New York"] | {"sex":"Female","age":27} | {"Python":80} | {"Te

st":"Lead","COE":"Architect"} || Lucy | ["Vancouver"] | {"sex":"Female","age":57} | {"Sales":89,"HR":94} | {"Sa

les":"Lead"} |+----------------+-------------------------+----------------------------+------------------------+-----

---------------------------------------------+--+

SELECT name,wp,sex,age,skill,score,dept,title

FROM employee

lateral VIEW explode(work_place) t AS wp

lateral VIEW inline(array(sex_age)) t AS sex,age

lateral VIEW explode(skills_score) t AS skill,score

lateral VIEW explode(dept_title) t AS dept,title;

+----------+------------+---------+------+---------+--------+-----------------+------------+--+

| name | wp | sex | age | skill | score | dept | title |

+----------+------------+---------+------+---------+--------+-----------------+------------+--+

| Michael | Montreal | Male | 30 | DB | 80 | Product | Developer |

| Michael | Montreal | Male | 30 | DB | 80 | Administration | Lead |

| Michael | Toronto | Male | 30 | DB | 80 | Product | Developer |

| Michael | Toronto | Male | 30 | DB | 80 | Administration | Lead |

| Will | Montreal | Male | 35 | Perl | 85 | Product | Lead |

| Will | Montreal | Male | 35 | Perl | 85 | Test | Lead |

| Shelley | New York | Female | 27 | Python | 80 | Test | Lead |

| Shelley | New York | Female | 27 | Python | 80 | COE | Architect |

| Lucy | Vancouver | Female | 57 | Sales | 89 | Sales | Lead |

| Lucy | Vancouver | Female | 57 | HR | 94 | Sales | Lead |

+----------+------------+---------+------+---------+--------+-----------------+------------+--+

SELECT split('aa,bb,cc',',');

+-------------------+--+

| _c0 |

+-------------------+--+

| ["aa","bb","cc"] |

+-------------------+--+

SELECT explode(split('aa,bb,cc',','));

+------+--+

| col |

+------+--+

| aa |

| bb |

| cc |

+------+--+

SELECT explode(split(null,','));

+------+--+

| col |

+------+--+

+------+--+

SELECT name,wp

FROM employee

lateral VIEW explode(split(null,',')) t AS wp;

+-------+-----+--+

| name | wp |

+-------+-----+--+

+-------+-----+--+

SELECT name,wp

FROM employee

lateral VIEW outer explode(split(null,',')) t AS wp;

+----------+-------+--+

| name | wp |

+----------+-------+--+

| Michael | NULL |

| Will | NULL |

| Shelley | NULL |

| Lucy | NULL |

+----------+-------+--+

-- 重点:

建库

建表(内,外)

加载数据

(1)load data ...

(2)INSERT INTO TABLE ...

分区(静,动)

侧视图(数组,映射,结构)

相关推荐
码小凡1 小时前
优雅!用了这两款插件,我成了整个公司代码写得最规范的码农
java·后端
RestCloud1 小时前
4中常见的数据集成方式
数据库
Databend2 小时前
超 10 倍查询加速,N-Gram Index 设计与优化全解析
数据库
掉鱼的猫3 小时前
Solon AI 五步构建 RAG 服务:2025 最新 AI + 向量数据库实战
java·redis·后端
爱可生开源社区3 小时前
SCALE:一个面向专业级任务的大语言模型 SQL 能力开源评测框架
数据库
java金融3 小时前
FactoryBean 和BeanFactory的傻傻的总是分不清?
java·后端
独立开阀者_FwtCoder3 小时前
Nginx 通过匹配 Cookie 将请求定向到特定服务器
java·vue.js·后端
名曰大神3 小时前
AEM6.5集成Redis详细步骤(附代码)
java·redis·demo·aem
带刺的坐椅4 小时前
Solon AI 五步构建 RAG 服务:2025 最新 AI + 向量数据库实战
java·redis·ai·solon·rag
星环科技TDH社区版4 小时前
星环科技产品可存储的表格式功能介绍以及创建示例
大数据·数据库