进入虚拟机,启动HDFS和Yarn


1.创建表
hive
show databases;
use test;
销售订单表
create table t_dml (
detail_id bigint,
sale_date date,
province string,
city string,
product_id bigint,
cnt bigint,
amt double
)row format delimited
fields terminated by ',';
商品详细表:
create table t_product (
product_id bigint,
product_name string,
category_id bigint,
category_name string,
price double
)row format delimited
fields terminated by ',';
2.加载数据
创建目录存放数据
mkdir /opt/data
加载数据
load data local inpath '/opt/data/t_dml.csv' into table t_dml;
load data local inpath '/opt/data/t_product.csv' into table t_product;

3. 销售数据分析
1.查询t_dml中的销售记录的时间段:
select max(sale_date), min(sale_date) from t_dml;

2.查询各商品类别的总销售额
select t.category_name, sum(t.amt) as total_money
from
( select a.product_id, a.amt, b.category_name
from t_dml a
join t_product b
on a.product_id=b.product_id
) t
group by t.category_name;

3.店主想知道哪个商品最畅销以及销量排行榜,请查询销量前10的商品,显示商品名称,销量,排名。
select a.product_name , t.cnt_total,
rank() over (order by t.cnt_total desc) as rk
from
( select product_id, sum(cnt) as cnt_total
from t_dml
group by product_id
order by cnt_total desc
limit 10
) t
join t_product a
on t.product_id=a.product_id;
4.店主想知道各个市县的购买力,同时也想知道自己的哪个商品在该地区最热卖,通过创建中间表,优化查询。
1. 创建结果存放表:
create table t_city_amt
( province string,
city string,
total_money double
);
create table t_city_prod
( province string,
city string,
product_id bigint,
product_name string,
cnt bigint
);
2.插入数据
insert into t_city_amt
select province,city,sum(amt)
from t_dml group by province,city;
SELECT * FROM t_city_amt;

insert into t_city_prod
select t.province,t.city,t.product_id,t.product_name,sum(t.cnt) from
(
select a.product_id,b.product_name,a.cnt,a.province,a.city
from t_dml a join t_product b
on a.product_id = b.product_id
) t
group by t.province,t.city,t.product_id,t.product_name;
SELECT * FROM t_city_prod;

3. 优化
from
( select a.*, b.product_name
from t_dml a
join t_product b
on a.product_id=b.product_id
) t
insert overwrite table t_city_amt
select province, city, sum(amt)
group by province, city
insert overwrite table t_city_prod
select province, city, product_id, product_name, sum(cnt)
group by province, city, product_id, product_name;
5.统计各省最强购买力地区:
select province, city, total_money
from
(
select province, city, total_money,
dense_rank() over (partition by province order by total_money desc) as rk
from t_city_amt
) t
where t.rk=1
order by total_money desc;
6.统计各地区的最畅销商品
select province, city, product_id, product_name
from
( select province, city, product_id, product_name,
dense_rank() over (partition by province order by cnt desc) as rk
from t_city_prod
) t
where t.rk=1
order by province, city;