Hive常用命令参考

一、数据库操作

1. 创建数据库

sql

复制代码
CREATE DATABASE [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];

示例 :创建名为test_db的数据库

sql

复制代码
CREATE DATABASE IF NOT EXISTS test_db 
COMMENT 'This is a test database'
LOCATION '/user/hive/databases/test_db'
WITH DBPROPERTIES ('creator'='admin', 'date'='2023-01-01');

2. 查看数据库

sql

复制代码
SHOW DATABASES;
SHOW DATABASES LIKE 'hive_*'; -- 模糊查询

3. 切换数据库

sql

复制代码
USE database_name;

4. 查看数据库详情

sql

复制代码
DESCRIBE DATABASE [EXTENDED] database_name;

5. 删除数据库

sql

复制代码
DROP DATABASE [IF EXISTS] database_name [CASCADE]; -- CASCADE表示级联删除表

二、表操作

1. 创建内部表

sql

复制代码
CREATE TABLE [IF NOT EXISTS] table_name (
  column_name data_type [COMMENT column_comment],
  ...
)
[COMMENT table_comment]
[ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
 LINES TERMINATED BY '\n']
[STORED AS TEXTFILE]
[LOCATION hdfs_path];

示例:创建员工表

sql

复制代码
CREATE TABLE IF NOT EXISTS employees (
  id INT COMMENT 'Employee ID',
  name STRING COMMENT 'Employee Name',
  salary FLOAT COMMENT 'Employee Salary'
)
COMMENT 'Employee Information Table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE;

2. 创建外部表

sql

复制代码
CREATE EXTERNAL TABLE [IF NOT EXISTS] table_name (
  column_name data_type [COMMENT column_comment],
  ...
)
[COMMENT table_comment]
[ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
 LINES TERMINATED BY '\n']
[STORED AS TEXTFILE]
LOCATION hdfs_path;

3. 创建分区表

sql

复制代码
CREATE TABLE [IF NOT EXISTS] table_name (
  column_name data_type [COMMENT column_comment],
  ...
)
PARTITIONED BY (partition_column data_type)
[ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' 
 LINES TERMINATED BY '\n'];

示例:按日期分区的日志表

sql

复制代码
CREATE TABLE IF NOT EXISTS logs (
  id INT,
  content STRING
)
PARTITIONED BY (log_date STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

4. 查看表列表

sql

复制代码
SHOW TABLES;
SHOW TABLES LIKE 'emp_*'; -- 模糊查询

5. 查看表结构

sql

复制代码
DESCRIBE [EXTENDED] table_name; -- EXTENDED显示详细信息

6. 修改表名

sql

复制代码
ALTER TABLE old_table_name RENAME TO new_table_name;

7. 删除表

sql

复制代码
DROP TABLE [IF EXISTS] table_name; -- 内部表数据会被删除,外部表仅删除元数据

三、数据加载与导出

1. 从本地文件系统加载数据

sql

复制代码
LOAD DATA LOCAL INPATH 'local_file_path' 
[OVERWRITE] INTO TABLE table_name 
[PARTITION (partition_column=value)];

示例:加载本地数据到员工表

sql

复制代码
LOAD DATA LOCAL INPATH '/home/user/employees.csv' 
OVERWRITE INTO TABLE employees;

2. 从 HDFS 加载数据

sql

复制代码
LOAD DATA INPATH 'hdfs_file_path' 
[OVERWRITE] INTO TABLE table_name 
[PARTITION (partition_column=value)];

3. 插入数据到表

sql

复制代码
INSERT INTO TABLE table_name [PARTITION (partition_column=value)]
SELECT column1, column2, ... FROM source_table;

4. 导出数据到本地

sql

复制代码
INSERT OVERWRITE LOCAL DIRECTORY 'local_output_path'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
SELECT column1, column2, ... FROM table_name;

四、查询操作

1. 基本查询

sql

复制代码
SELECT column1, column2, ... 
FROM table_name 
[WHERE condition]
[GROUP BY column]
[HAVING condition]
[ORDER BY column [ASC|DESC]]
[LIMIT number];

2. 关联查询

sql

复制代码
SELECT a.column, b.column 
FROM table_a a
JOIN table_b b ON a.key = b.key; -- INNER JOIN
-- LEFT JOIN, RIGHT JOIN, FULL OUTER JOIN

3. 子查询

sql

复制代码
SELECT * FROM table_name 
WHERE column IN (SELECT column FROM another_table WHERE condition);

五、分区与分桶

1. 添加分区

sql

复制代码
ALTER TABLE table_name ADD [IF NOT EXISTS]
PARTITION (partition_column=value) [LOCATION hdfs_path];

2. 删除分区

sql

复制代码
ALTER TABLE table_name DROP [IF EXISTS] PARTITION (partition_column=value);

3. 创建分桶表

sql

复制代码
CREATE TABLE [IF NOT EXISTS] table_name (
  column_name data_type,
  ...
)
CLUSTERED BY (column) INTO num_buckets BUCKETS;

六、常用函数

1. 字符串函数

sql

复制代码
SELECT CONCAT('Hello', ' ', 'World'); -- 拼接字符串
SELECT SUBSTR('Hive', 1, 2); -- 截取子串,返回'HI'
SELECT LENGTH('Hive'); -- 返回长度4

2. 日期函数

sql

复制代码
SELECT CURRENT_DATE(); -- 获取当前日期
SELECT DATE_FORMAT('2023-01-01', 'yyyy-MM-dd'); -- 格式化日期

3. 聚合函数

sql

复制代码
SELECT COUNT(*), AVG(salary), MAX(salary) FROM employees;

七、其他常用命令

1. 查看 Hive 版本

sql

复制代码
SELECT VERSION();

2. 执行 HQL 脚本

bash

复制代码
hive -f /path/to/script.hql

3. 查看作业历史

sql

复制代码
SHOW HISTORY;

4. 设置 Hive 参数

sql

复制代码
SET hive.exec.dynamic.partition.mode=nonstrict; -- 动态分区模式