hadoop学习:mapreduce入门案例二:统计学生成绩

这里相较于 wordcount,新的知识点在于学生实体类的编写以及使用

数据信息:

  1. Student 实体类
java 复制代码
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Student implements WritableComparable<Student> {
//    Object
    private long stuid;
    private String stuName;
    private int score;

    public Student(long stuid, String stuName, int score) {
        this.stuid = stuid;
        this.stuName = stuName;
        this.score = score;
    }

    @Override
    public String toString() {
        return "Student{" +
                "stuid=" + stuid +
                ", stuName='" + stuName + '\'' +
                ", score=" + score +
                '}';
    }

    public Student() {
    }

    public long getStuid() {
        return stuid;
    }

    public void setStuid(long stuid) {
        this.stuid = stuid;
    }

    public String getStuName() {
        return stuName;
    }

    public void setStuName(String stuName) {
        this.stuName = stuName;
    }

    public int getScore() {
        return score;
    }

    public void setScore(int score) {
        this.score = score;
    }

    // 自动整理文件格式 ctrl + shift + f  英文输放状态
    @Override
    public int compareTo(Student o) {
        return this.score > o.score ? 1 : 0;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeLong(stuid);
        dataOutput.writeUTF(stuName);
        dataOutput.writeInt(score);
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
        this.stuid = dataInput.readLong();
        this.stuName = dataInput.readUTF();
        this.score = dataInput.readInt();
    }
}
  1. mapper 阶段,StudentMapper 类
java 复制代码
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * 输出  key:学生id   value:Student对象
 */
public class StudentMapper extends Mapper<LongWritable, Text,LongWritable,Student> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split(",");
        LongWritable stuidKey = new LongWritable(Long.parseLong(split[0]));
        Student stuValue = new Student(Long.parseLong(split[0]),split[1],Integer.parseInt(split[2]));
        context.write(stuidKey,stuValue);
    }
}
  1. reduce 阶段,StudentReduce 类
java 复制代码
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class StudentReducer extends Reducer<LongWritable,Student,Student, NullWritable> {
    @Override
    protected void reduce(LongWritable key, Iterable<Student> values, Context context) throws IOException,
            InterruptedException {
        Student stuOut = new Student();
        int sumScore = 0;
        String stuName = "";
        for (Student stu :
                values) {
            sumScore+=stu.getScore();
            stuName = stu.getStuName();
        }
        stuOut.setScore(sumScore);
        stuOut.setStuid(key.get());
        stuOut.setStuName(stuName);
        System.out.println(stuOut.toString());
        context.write(stuOut, NullWritable.get());
    }
}
  1. 驱动类,studentDriver 类
java 复制代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class StudentDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf);

        job.setJarByClass(StudentDriver.class);

        //配置 job中map阶段处理类和map阶段的输出类型
        job.setMapperClass(StudentMapper.class);
        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Student.class);


        //配置 job中deduce阶段处理类和reduce阶段的输出类型
        job.setReducerClass(StudentReducer.class);
        job.setOutputKeyClass(Student.class);
        job.setOutputValueClass(NullWritable.class);

        // 输入路径配置  "hdfs://kb131:9000/kb23/hadoopstu/stuscore.csv"
        Path inpath = new Path(args[0]);  // 外界获取文件输入路径
        FileInputFormat.setInputPaths(job, inpath);
        // 输出路径配置  "hdfs://kb131:9000/kb23/hadoopstu/out2"
        Path path = new Path(args[1]);    //
        FileSystem fs = FileSystem.get(path.toUri(), conf);
        if (fs.exists(path))
            fs.delete(path,true);
        FileOutputFormat.setOutputPath(job,path);

        job.waitForCompletion(true);
    }
}
相关推荐
wdfk_prog1 小时前
[Linux]学习笔记系列 -- [drivers][input]serio
linux·笔记·学习
十月南城1 小时前
Hive与离线数仓方法论——分层建模、分区与桶的取舍与查询代价
数据仓库·hive·hadoop
ZH15455891313 小时前
Flutter for OpenHarmony Python学习助手实战:GUI桌面应用开发的实现
python·学习·flutter
B站计算机毕业设计超人3 小时前
计算机毕业设计Hadoop+Spark+Hive招聘推荐系统 招聘大数据分析 大数据毕业设计(源码+文档+PPT+ 讲解)
大数据·hive·hadoop·python·spark·毕业设计·课程设计
B站计算机毕业设计超人3 小时前
计算机毕业设计hadoop+spark+hive交通拥堵预测 交通流量预测 智慧城市交通大数据 交通客流量分析(源码+LW文档+PPT+讲解视频)
大数据·hive·hadoop·python·spark·毕业设计·课程设计
AI架构师小马3 小时前
Hive调优手册:从入门到精通的完整指南
数据仓库·hive·hadoop·ai
数据架构师的AI之路3 小时前
深入了解大数据领域Hive的HQL语言特性
大数据·hive·hadoop·ai
编程小白20264 小时前
从 C++ 基础到效率翻倍:Qt 开发环境搭建与Windows 神级快捷键指南
开发语言·c++·windows·qt·学习
学历真的很重要4 小时前
【系统架构师】第二章 操作系统知识 - 第二部分:进程与线程(补充版)
学习·职场和发展·系统架构·系统架构师
深蓝海拓4 小时前
PySide6,QCoreApplication::aboutToQuit与QtQore.qAddPostRoutine:退出前后的清理工作
笔记·python·qt·学习·pyqt