Flink运行wordcount——读写hdfs

java 复制代码
package com.test;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class ReadHDFS {
    public static void main(String[] args) {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<String> text = env.readTextFile("hdfs://10.9.13.171:54310/testdir/abc.txt"); // your source here

        DataStream<Tuple2<String, Integer>> wordCounts = text.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
                String[] words = value.toLowerCase().split("\\s+");
                for (String word : words) {
                    if (!word.isEmpty()) {
                        out.collect(new Tuple2<>(word, 1));
                    }
                }
            }
        });

        wordCounts.print();
        wordCounts.writeAsText("hdfs://10.9.13.171:54310/testdir/wordcountoutput");

        try {
            env.execute("WordCount Job");
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}

pom.xml文件

java 复制代码
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>flink-test</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <flink.version>1.14.5</flink.version>
        <hadoop.version>3.1.2</hadoop.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-java</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <!-- 其他依赖 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-core</artifactId>
            <version>${flink.version}</version> <!-- 用您实际使用的Flink版本号替换 ${flink.version} -->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java_2.12</artifactId>
            <version>${flink.version}</version> <!-- 用您实际使用的Flink版本号替换 ${flink.version} -->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.12</artifactId>
            <version>${flink.version}</version> <!-- 用您实际使用的Flink版本号替换 ${flink.version} -->
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version> <!-- 用您实际使用的Hadoop版本号替换 ${hadoop.version} -->
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version> <!-- 用您实际使用的Hadoop版本号替换 ${hadoop.version} -->
        </dependency>
    </dependencies>
    <build>
        <plugins>
            <!-- 配置将依赖包一并打入到项目的 jar 包中 -->
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.6.0</version>
                <configuration>
                    <archive>
                        <manifest>
                            <mainClass>com.test.ReadHDFS</mainClass>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id> <!-- this is used for inheritance merges -->
                        <phase>package</phase> <!-- 指定在打包节点执行jar包合并操作 -->
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

打包后提交到yarn集群命令

java 复制代码
[root@node171 lib]# flink run -m yarn-cluster  flink-test-1.0-SNAPSHOT-jar-with-dependencies.jar 

报错

相关推荐
瓦中空花9 分钟前
大数据工具-Flink
大数据·flink
Lab_AI12 分钟前
iLabPower LES与SDH科学数据基因组平台赋能光电材料研发与生产,鼎材科技与创腾科技进一步深化合作
大数据·人工智能·oled·材料设计·光电材料研发·材料创新·材料研发
渣渣盟12 分钟前
Flink实现TopN URL访问量统计
大数据·flink·scala
无你想你12 分钟前
Datawhale之春晚机器人跳舞复刻
大数据·elasticsearch·机器人
添柴少年yyds15 分钟前
Flink的Checkpoint原理和流程
flink
wAIxiSeu16 分钟前
万字长文解析Apache Paimon
大数据
网络工程小王23 分钟前
【大数据技术详解】——HIVE技术(学习笔记)
大数据·hive·hadoop
刘一说24 分钟前
Git 工具知识全景图:从核心概念到高效协作实践
大数据·git·elasticsearch
MarsLord24 分钟前
ElasticSearch快速入门实战(1)-索引、别名、建模最佳实践
大数据·elasticsearch·搜索引擎
徐礼昭|商派软件市场负责人27 分钟前
“80%应用将消亡”?后App时代:AI智能体重构人机交互与数字商业新秩
大数据·人工智能·人机交互·零售·智能搜索·ai推荐