hive表小练习 - 技术栈

-- 将对应的命令写在每个步骤中

-- 1.在hive中创建数据库school

sql 复制代码

create database if not exists school;

-- 2.在数据库school中创建如下的表每张表的列分隔符都是**,** 存储格式是textfile

创建表名为student_info,

字段为stu_id 类型为string,注释为学生id

字段为stu_name 类型为string,注释为学生姓名

字段为birthday 类型为string,注释为出生日期

字段为sex 类型为string,注释为性别

sql 复制代码

create table if not exists school.student_info(
    stu_id string comment "学生id",
    stu_name string comment "学生姓名",
    birthday string comment "出生日期",
    sex string comment "性别"
     
)
comment "学生内部表"
row format delimited fields terminated by ','
stored as textfile
;

写入 /opt/sql/school/create_student.sql

beeline中执行

sql 复制代码

source /opt/sql/school/create_student.sql;

创建表名为course_info,

字段为course_id 类型为string,注释为课程id

字段为course_name 类型为string,注释为课程名

字段为tea_id 类型为string,注释为教师id

sql 复制代码

create table if not exists school.course_info(
    course_id string comment "课程id",
    course_name string comment "课程名",
    tea_id string comment "教师id"
    
)
comment "课程内部表"
row format delimited fields terminated by ','
stored as textfile
;

创建表名为teacher_info,

字段为tea_id 类型为string,注释为教师id

字段为tea_name 类型为string,注释为教师姓名

sql 复制代码

create table if not exists school.teacher_info(
    tea_id string comment "教师id",
    tea_name string comment "教师姓名"
    
)
comment "教师内部表"
row format delimited fields terminated by ','
stored as textfile
;

创建表名为score_info,

字段为stu_id 类型为string,注释为学生id

字段为course_id 类型为string,注释为课程id

字段为score 类型为int,注释为成绩

sql 复制代码

create table if not exists school.score_info(
    stu_id string comment "学生id",
    course_id string comment "课程id",
    score int comment "成绩"
    
)
comment "分数内部表"
row format delimited fields terminated by ','
stored as textfile
;

-- 3. 将对应的数据文件加载到对应的表中

数据

文件存放在/opt/file/school_filem目录下

学生表数据

load data local inpath '/opt/file/school_file/student_info.txt' overwrite into table school.student_info;

课程表数据

load data local inpath '/opt/file/school_file/course_info.txt' overwrite into table school.course_info;

教师表数据

load data local inpath '/opt/file/school_file/teacher_info.txt' overwrite into table school.teacher_info;

分数表数据

load data local inpath '/opt/file/school_file/score_info.txt' overwrite into table school.score_info;

sql语句练习

---1.查询学生表中姓名带冰的学生信息

---模糊查询 %:表示匹配0-n个字符

select * from school.student_info where stu_name like '%冰%';

---2.查询学生表中以周为姓的学生信息
select * from school.student_info where stu_name like '周%';

---3.查询数学成绩不及格的学生及其对应的数学成绩，按照学号进行升序排序

--1.在课程表中查询数学的课程id

课程名与分数不在同一个表中，首先在course_info获取课程名的id

select course_id from school.course_info where course_name='数学';

--2.通过查询到的课程id去成绩表中查询学生id和数学成绩的分数

select stu_id,score from school.score_info
where course_id = (select course_id from school.course_info where course_name='数学';)

--3.通过查询到的学生id去学生表中查询对应的学生信息，根据学生id升序

目前查询到的仅仅是学生id和数学分数，获取学生的其他信息在学生表中

将1.2步的结果集与学生表连接

select t1.*,t2.score from school.student_info t1
inner join
(select stu_id,score from school.score_info
where course_id = (select course_id from school.course_info where course_name='数学';)) t2
on t1.stu_id = t2.stu_id
where t2.score < 60
order by t1.stu_id;

inner join 是SQL中的一种连接操作，用于根据两个或多个表中的相关列来组合行。内连接只返回两个表中相互匹配的行。换句话说，它生成的结果集仅包含满足连接条件的记录，即两个表中指定列的值相等的行会被连接在一起。

-- 4.查询课程编号03的总成绩

select course_id,sum(score) as total_score from school.score_info where course_id='03' group by course_id;

-- 5.查询参加考试的学生人数

--需要对记录数进行去重

select count**(distinct stu_id)** as count from school.score_info;

--6.查询各科成绩的最高分和最低分最后显示课程编号最高分最低分

select course_id,max(score) as max_score ,min(score) as min_score from school.score_info group by course_id;

--7.查询平均成绩大于60分的学生编号和对应的平均成绩

select stu_id,avg(score) as avg_score from school.score_info group by stu_id having avg(score) >60;

--8.查询平均成绩大于60分的学生姓名和对应的平均成绩

--1.在成绩表中计算每个学生的平均成绩

--2.关联学生表，获取学生姓名筛选平均成绩 > 60的记录

select t1.stu_name,t2.avg_score from school.student_info t1

inner join

select stu_id,avg(score) as avg_score from school.score_info group by stu_id t2

on t1.stu_id = t2.stu_id

where t2.avg_score > 60;

--9.按照学生id，语文成绩，数学成绩，英语成绩，平均成绩，有效课程数，进行查询

--1.计算每个学生的语文成绩，数学成绩，英语成绩

--2.计算每个学生的平均成绩

--3.将上述的两个结果集进行关联

select

t1.stu_id,

sum(if(t2.course_name='语文',score,0)) as `语文成绩`,

sum(if(t2.course_name='数学',score,0)) as `数学成绩`,

sum(if(t2.course_name='英语',score,0)) as `英语成绩`,

avg(t1.score) as `平均成绩`,

count(*) as `有效课程数`

fromschool.score_info t1

inner join school.course_info t2

on t1.course_id=t2.course_id

group by t1.stu_id;