wordcount在mapreduce的例子

1.启动集群

2.创建项目

项目结构为:

3.pom.xml文件为

bash 复制代码
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>org.example</groupId>
  <artifactId>mapReduceTest</artifactId>
  <packaging>war</packaging>
  <version>1.0-SNAPSHOT</version>
  <name>mapReduceTest Maven Webapp</name>
  <url>http://maven.apache.org</url>
  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>3.8.1</version>
      <scope>test</scope>
    </dependency>

    <dependency>
      <groupId>org.apache.logging.log4j</groupId>
      <artifactId>log4j-slf4j-impl</artifactId>
      <version>2.12.0</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>3.1.3</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>3.1.3</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-core</artifactId>
      <version>3.1.3</version>
    </dependency>

    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>3.1.3</version>
      <exclusions>
        <!-- ’d Log4j 1.x -->
        <exclusion>
          <groupId>log4j</groupId>
          <artifactId>log4j</artifactId>
        </exclusion>
        <!-- ’d SLF4J ’ Log4j 1.x „e¥ -->
        <exclusion>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-log4j12</artifactId>
        </exclusion>
      </exclusions>
    </dependency>
  </dependencies>
  <build>
    <finalName>mapReduceTest</finalName>
  </build>
</project>

4.WordCountMapper代码为

java 复制代码
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class WordCountMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
    @Override
    protected void map(LongWritable key1,Text value1,Context context) throws IOException, InterruptedException {
        String data=value1.toString();
        String[] words=data.split(" ");
        for(String w:words){
            context.write(new Text(w),new IntWritable(1));
        }
    }

}

5.WordCountReduce代码为:

java 复制代码
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WordCountReduce extends Reducer<Text,IntWritable,Text,IntWritable> {
    @Override
    protected void reduce(Text k3,Iterable<IntWritable> v3,Context context) throws IOException, InterruptedException {
        int total=0;
        for(IntWritable v:v3){
            total+=v.get();
        }
        context.write(k3,new IntWritable(total));
    }
}

6.WordCountMain代码为:

java 复制代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;

public class WordCountMain {
    public static void main(String[] args) throws Exception {
        Job job = Job.getInstance(new Configuration());
        job.setJarByClass(WordCountMain.class);

        job.setMapperClass(WordCountMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        job.setReducerClass(WordCountReduce.class);
        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPaths(job, new Path("hdfs://172.18.0.2:9000/input"));
        FileOutputFormat.setOutputPath(job, new Path("hdfs://172.18.0.2:9000/WordCountOutput"));

        job.waitForCompletion(true);
    }
}

7.测试结果

运行这个main,可以看到

用shell脚本可以查看

相关推荐
Lx3523 小时前
Hadoop容错机制深度解析:保障作业稳定运行
大数据·hadoop
T06205148 小时前
工具变量-5G试点城市DID数据(2014-2025年
大数据
向往鹰的翱翔8 小时前
BKY莱德因:5大黑科技逆转时光
大数据·人工智能·科技·生活·健康医疗
鸿乃江边鸟9 小时前
向量化和列式存储
大数据·sql·向量化
IT毕设梦工厂10 小时前
大数据毕业设计选题推荐-基于大数据的客户购物订单数据分析与可视化系统-Hadoop-Spark-数据可视化-BigData
大数据·hadoop·数据分析·spark·毕业设计·源码·bigdata
java水泥工10 小时前
基于Echarts+HTML5可视化数据大屏展示-白茶大数据溯源平台V2
大数据·echarts·html5
广州腾科助你拿下华为认证12 小时前
华为考试:HCIE数通考试难度分析
大数据·华为
在未来等你14 小时前
Elasticsearch面试精讲 Day 17:查询性能调优实践
大数据·分布式·elasticsearch·搜索引擎·面试
大数据CLUB17 小时前
基于spark的澳洲光伏发电站选址预测
大数据·hadoop·分布式·数据分析·spark·数据开发
ratbag67201317 小时前
当环保遇上大数据:生态环境大数据技术专业的课程侧重哪些领域?
大数据