mr-----topn的用法

复制代码
package org.one;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;
import java.util.*;

public class CleanMusicDriver {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conn = new Configuration();
        Job job = Job.getInstance(conn);
        job.setJarByClass(CleanMusicDriver.class);
        job.setMapperClass(CleanMusicMapper.class);
        job.setReducerClass(CleanMusicReduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job,new Path(args[0]));
        FileOutputFormat.setOutputPath(job,new Path(args[1]));
        System.exit(job.waitForCompletion(true)?0:1);
    }

    private static class CleanMusicMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
        @Override
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            if(key.get()==0){
                return;
            }
            String[] strs = value.toString().split(",");
            String genre=strs[3];
            context.write(new Text(genre),new IntWritable(1));
        }
    }

    private static class CleanMusicReduce extends Reducer<Text,IntWritable,Text,IntWritable> {
        HashMap<String,Integer> hm = new HashMap<>();
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int sum=0;
            for (IntWritable value : values) {
                sum++;
            }
            hm.put(key.toString(),sum);
        }

        @Override
        protected void cleanup(Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            ArrayList<Map.Entry<String, Integer>> ety = new ArrayList<>(hm.entrySet());
            Collections.sort(ety, new Comparator<Map.Entry<String, Integer>>() {
                @Override
                public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
                    return o2.getValue()-o1.getValue();
                }
            });
            for (int i = 0; i <5;i++) {
                context.write(new Text(ety.get(i).getKey()),new IntWritable(ety.get(i).getValue()));
            }

        }
    }
}
相关推荐
Re_zero6 小时前
线上日志被清空?这段仅10行的 IO 代码里竟然藏着3个毒瘤
java·后端
洋洋技术笔记6 小时前
Spring Boot条件注解详解
java·spring boot
程序员清风1 天前
程序员兼职必看:靠谱软件外包平台挑选指南与避坑清单!
java·后端·面试
皮皮林5511 天前
利用闲置 Mac 从零部署 OpenClaw 教程 !
java
华仔啊1 天前
挖到了 1 个 Java 小特性:var,用完就回不去了
java·后端
SimonKing1 天前
SpringBoot整合秘笈:让Mybatis用上Calcite,实现统一SQL查询
java·后端·程序员
日月云棠2 天前
各版本JDK对比:JDK 25 特性详解
java
用户8307196840822 天前
Spring Boot 项目中日期处理的最佳实践
java·spring boot
JavaGuide2 天前
Claude Opus 4.6 真的用不起了!我换成了国产 M2.5,实测真香!!
java·spring·ai·claude code
IT探险家2 天前
Java 基本数据类型:8 种原始类型 + 数组 + 6 个新手必踩的坑
java