Hive高频SQL及典型应用场景总结
一、基础操作类高频SQL
1. **创建表(含分区/分桶)**
sql
CREATE TABLE sales (
employee_id STRING,
sale_amount DOUBLE,
trans_date DATE
) PARTITIONED BY (year INT, month INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORC; -- 企业常用ORC/Parquet格式优化存储
场景
日志表按年月分区,提升查询效率(如按日期过滤时仅扫描特定分区)。
2. 数据加载
sql
LOAD DATA INPATH '/hdfs/path/sales.csv' INTO TABLE sales;
-- 从HDFS加载数据到分区表
二、分析类高频SQL
1. 窗口函数
- Top N场景(如部门销售额Top 3员工)
sql
SELECT employee_id, sale_amount,
ROW_NUMBER() OVER (PARTITION BY dept ORDER BY sale_amount DESC) AS rank
FROM sales
WHERE rank <= 3; -- 避免并列排名
- 累计计算(如用户月度累计访问次数)
sql
SELECT user_id, month,
SUM(visit_cnt) OVER (PARTITION BY user_id ORDER BY month) AS total_visits
FROM user_logs; -- 实现滚动累计统计
2. LATERAL VIEW + EXPLODE(列转行)
sql
SELECT user_id, product
FROM orders
LATERAL VIEW EXPLODE(product_list) tmp AS product;
-- 展开JSON数组字段为多行
3. 行转列(Pivot)
sql
SELECT customer_id,
CONCAT_WS(',', COLLECT_SET(product_name)) AS all_products
FROM purchases
GROUP BY customer_id;
-- 聚合多行数据为字符串
三、高频聚合与条件操作
1. 聚合函数+分组
sql
SELECT dept, AVG(salary), COUNT(DISTINCT employee_id)
FROM employees
GROUP BY dept; -- 结合DISTINCT去重统计
2. CASE WHEN条件分支
sql
SELECT user_id,
CASE WHEN total_spend > 10000 THEN 'VIP'
WHEN total_spend > 5000 THEN '中级'
ELSE '普通' END AS user_level
FROM orders; -- 用户分层场景
四、高频面试SQL题示例
1. Top N问题
sql
SELECT * FROM (
SELECT *, DENSE_RANK() OVER (ORDER BY sale_amount DESC) AS rank
FROM sales
) tmp WHERE rank <= 5; -- 处理并列排名
2. 时间区间统计
sql
SELECT user_id,
SUM(IF(month BETWEEN 1 AND 3, amount, 0)) AS Q1_sales
FROM orders
GROUP BY user_id; -- 按季度汇总销售额
3. 连续登录用户
sql
SELECT user_id
FROM (
SELECT user_id,
LEAD(login_date, 2) OVER (PARTITION BY user_id ORDER BY login_date) AS date_plus2
FROM logins
) tmp WHERE DATEDIFF(date_plus2, login_date) = 2; -- 检测连续3天登录
五、优化类高频操作
1. 分区过滤
sql
SELECT * FROM sales
WHERE year=2025 AND month=3; -- 分区裁剪减少数据扫描量
2. 避免笛卡尔积
sql
SELECT a.id, b.name
FROM table_a a
JOIN table_b b ON a.key = b.key; -- 显式指定JOIN条件
核心总结
高频操作:窗口函数、行列转换、条件聚合
典型场景:日志分析、用户分层、报表生成
优化重点:分区/分桶设计、避免全表扫描、合理使用存储格式(ORC/Parquet)