hadoop:案例:将顾客在京东、淘宝、多点三家平台的消费金额汇总,然后先按京东消费额排序,再按淘宝消费额排序

一、原始消费数据buy.txt

复制代码
zhangsan 5676 2765 887
lisi 6754 3234 1232
wangwu 3214 6654 388
lisi 1123 4534 2121
zhangsan 982 3421 5566
zhangsan 1219 36 45

二、实现思路:先通过一个MapReduce将顾客的消费金额进行汇总,再通过一个MapReduce来根据金额进行排序

三、定义一个实体类(其中compareTo方法实现了排序规则):

复制代码
package cn.edu.tju;

import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class Buy implements WritableComparable<Buy> {
    private double jingdong;
    private double taobao;
    private double duodian;

    public Buy() {
    }

    public Buy(double jingdong, double taobao, double duodian) {
        this.jingdong = jingdong;
        this.taobao = taobao;
        this.duodian = duodian;
    }

    public double getJingdong() {
        return jingdong;
    }

    public void setJingdong(double jingdong) {
        this.jingdong = jingdong;
    }

    public double getTaobao() {
        return taobao;
    }

    public void setTaobao(double taobao) {
        this.taobao = taobao;
    }

    public double getDuodian() {
        return duodian;
    }

    public void setDuodian(double duodian) {
        this.duodian = duodian;
    }

    @Override
    public String toString() {
        return "" +
                "" + jingdong +
                "\t" + taobao +
                "\t" + duodian
                ;
    }

    @Override
    public void write(DataOutput out) throws IOException {
        out.writeDouble(jingdong);
        out.writeDouble(taobao);
        out.writeDouble(duodian);
    }

    @Override
    public void readFields(DataInput in) throws IOException {
        this.jingdong =in.readDouble();
        this.taobao = in.readDouble();
        this.duodian = in.readDouble();

    }

    @Override
    public int compareTo(Buy o) {
        if(this.jingdong>o.getJingdong()){
            return 1;
        } else if(this.getJingdong()< o.getJingdong()){
            return -1;
        } else {
            if(this.getTaobao()>o.getTaobao()){
                return 1;
            }else if(this.getTaobao()< o.getTaobao()){
                return -1;
            } else return 0;
        }
    }
}

四、定义第一对Mapper和Reducer

复制代码
package cn.edu.tju;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


import java.io.IOException;

public class MyBuyMapper1 extends Mapper<LongWritable, Text, Text, Buy> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String str = value.toString();
        String[] fieldList = str.split(" ");
        double jingdong = Double.parseDouble(fieldList[1]);
        double taobao = Double.parseDouble(fieldList[2]);
        double duodian = Double.parseDouble(fieldList[3]);
        String person = fieldList[0];
        context.write(new Text(person), new Buy(jingdong,taobao,duodian));

    }
}

package cn.edu.tju;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.Iterator;

public class MyBuyReducer1 extends Reducer<Text, Buy, Text, Buy> {
    @Override
    protected void reduce(Text key, Iterable<Buy> values, Reducer<Text, Buy, Text, Buy>.Context context) throws IOException, InterruptedException {
        double sum1 = 0;
        double sum2 = 0;
        double sum3 = 0;
        Iterator<Buy> iterator = values.iterator();
        while (iterator.hasNext()) {
            Buy next = iterator.next();
            sum1 += next.getJingdong();
            sum2 += next.getTaobao();
            sum3 += next.getDuodian();


        }
        context.write(key, new Buy(sum1, sum2, sum3));
    }
}

五、定义第二对Mapper和Reducer

复制代码
package cn.edu.tju;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class MyBuyMapper2 extends Mapper<LongWritable, Text, Buy, Text> {
    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String str = value.toString();
        String[] fieldList = str.split("\t");
        double jingdong = Double.parseDouble(fieldList[1]);
        double taobao = Double.parseDouble(fieldList[2]);
        double duodian = Double.parseDouble(fieldList[3]);
        String person = fieldList[0];

        context.write(new Buy(jingdong,taobao,duodian), new Text(person));

    }
}

package cn.edu.tju;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.util.Iterator;

public class MyBuyReducer2 extends Reducer<Buy, Text, Text, Buy> {
    @Override
    protected void reduce(Buy key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        Iterator<Text> iterator = values.iterator();
        while(iterator.hasNext()){
            Text next = iterator.next();
            context.write(next, key);
        }
    }
}

六、定义主类,其中定义两个Job,等第一个job运行结束之后第二Job开始运行

复制代码
package cn.edu.tju;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class MyBuyMain2 {
    public static void main(String[] args) throws Exception {
        Configuration configuration = new Configuration(true);
        configuration.set("mapreduce.framework.name", "local");

        Job job = Job.getInstance(configuration);
        //
        job.setJarByClass(MyBuyMain.class);
        //job name
        job.setJobName("buy-" + System.currentTimeMillis());
        //设置Reducer数量
        //job.setNumReduceTasks(3);




        //输入数据路径
        FileInputFormat.setInputPaths(job, new Path("D:\\tool\\TestHadoop3\\buy.txt"));
        //输出数据路径,当前必须不存在
        FileOutputFormat.setOutputPath(job, new Path("count_1" ));
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Buy.class);
        job.setMapperClass(MyBuyMapper1.class);
        job.setReducerClass(MyBuyReducer1.class);
        //等待任务执行完成
        job.waitForCompletion(true);

        Job job2 = Job.getInstance(configuration);
        job2.setJarByClass(MyBuyMain2.class);
        job2.setJobName("buy2-" + System.currentTimeMillis());
        FileInputFormat.setInputPaths(job2, new Path("D:\\tool\\TestHadoop3\\count_1\\part-r-00000"));
        //输出数据路径,当前必须不存在
        FileOutputFormat.setOutputPath(job2, new Path("count_2" ));

        job2.setMapOutputKeyClass(Buy.class);
        job2.setMapOutputValueClass(Text.class);

        job2.setMapperClass(MyBuyMapper2.class);
        job2.setReducerClass(MyBuyReducer2.class);
        //等待任务执行完成
        job2.waitForCompletion(true);
    }
}

七、运行结果:

相关推荐
清水白石00816 分钟前
《解锁 Python 潜能:从内存模型看可变与不可变对象,及其实战最佳实践》
大数据·开发语言·python
爱吃糖的z29 分钟前
Elasticsearch Percolate Query使用优化案例-从2000到500ms
大数据·elasticsearch·搜索引擎
Hello.Reader30 分钟前
Apache Flink 2.2.0 源码编译从环境准备到 PyFlink 打包一次讲清
大数据·flink·apache
黑客说35 分钟前
独领无限流赛道:白日梦科技,重新定义AI时代的互动娱乐标杆
大数据·人工智能
综合热讯44 分钟前
同健梦社区计划正式发布!
大数据
武子康1 小时前
大数据-247 离线数仓 - 电商分析 Hive 拉链表实战:订单历史状态增量刷新、闭链逻辑与错误排查
大数据·后端·apache hive
APO Research1 小时前
Virtual Commissioning产业趋势:数字孪生驱动的虚拟调试如何重塑工业自动化工程体系
大数据·自动化·智能制造·数字孪生·工业自动化·工程数字化
言午说数据1 小时前
数仓入门篇-数仓分层
大数据·面试
IPHWT 零软网络1 小时前
让前厅更高效,让服务更暖心——HWT2.0酒店话务台,重构宾客体验新范式
大数据·重构
源码技术栈1 小时前
整合物联网、大数据、人工智能等技术的Java智慧工地项目管理系统源码
大数据·人工智能·物联网·源码·二次开发·项目·智慧工地