flink设置保存点和恢复保存点

增加了hdfs

bash 复制代码
package com.qyt;

 import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
 import org.apache.flink.runtime.state.storage.FileSystemCheckpointStorage;
 import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
 import org.apache.flink.streaming.api.environment.CheckpointConfig;
 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.util.Collector;

/**
 * DataStreamSource API使用
 */
public class StreamWordCount {

    public static void main(String[] args) throws Exception {


        //TODO 1、获取流的类
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        System.setProperty("HADOOP_USER_NAME", "root");
        env.enableCheckpointing(3000);
        // 配置存储检查点到文件系统
        env.getCheckpointConfig().setCheckpointStorage(new FileSystemCheckpointStorage("hdfs://hadoop01:9000/flink"));
        env.getCheckpointConfig().setCheckpointTimeout(2000l);
        env.getCheckpointConfig().setExternalizedCheckpointCleanup(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        //TODO 2、获取无界流
        DataStreamSource<String> stringDataStreamSource = env.socketTextStream("192.168.1.10", 9000, "\n");

         //TODO 3 ETL
        //TODO 3.1 转换成二元数组,简单ETL的过程
        SingleOutputStreamOperator<Tuple2<String, Integer>> process = stringDataStreamSource.process(new ProcessFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void processElement(String value, ProcessFunction<String, Tuple2<String, Integer>>.Context ctx, Collector<Tuple2<String, Integer>> out) throws Exception {
                String[] words = value.split(" ");
                for (String word : words) {
                    Tuple2<String, Integer> tuple2 = Tuple2.of(word, 1);
                    out.collect(tuple2);
                }
            }
        }).uid("etl");

        //TODO 3.1 分组
        KeyedStream<Tuple2<String, Integer>, String> tuple2StringKeyedStream = process.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {

            @Override
            public String getKey(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
            }
        });

        //TODO 3.2 聚合计算
        SingleOutputStreamOperator<Tuple2<String, Integer>> sum = tuple2StringKeyedStream.sum(1);

        //TODO 4、打印
        sum.print();

        //TODO 5、无界流需要这个不断执行的方法
        env.execute();
    }
}

要增加hadoop客户端的使用

bash 复制代码
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xmlns="http://maven.apache.org/POM/4.0.0"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>flink-demo</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <flink.version>1.17.0</flink.version>
    </properties>


    <dependencies>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>

     <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>${flink.version}</version>
         <scope>provided</scope>
     </dependency>

      <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-kafka</artifactId>
        <version>${flink.version}</version>
    </dependency>


        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>3.3.4</version>
         </dependency>
    </dependencies>


    <build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-shade-plugin</artifactId>
            <version>3.2.4</version>
            <executions>
                <execution>
                    <phase>package</phase>
                    <goals>
                        <goal>shade</goal>
                    </goals>
                    <configuration>
                        <artifactSet>
                            <excludes>
                                <exclude>com.google.code.findbugs:jsr305</exclude>
                                <exclude>org.slf4j:*</exclude>
                                <exclude>log4j:*</exclude>
                            </excludes>
                        </artifactSet>
                        <filters>
                            <filter>
                                <!-- Do not copy the signatures in the META-INF folder.
                                Otherwise, this might cause SecurityExceptions when using the JAR. -->
                                <artifact>*:*</artifact>
                                <excludes>
                                    <exclude>META-INF/*.SF</exclude>
                                    <exclude>META-INF/*.DSA</exclude>
                                    <exclude>META-INF/*.RSA</exclude>
                                </excludes>
                            </filter>
                        </filters>
                        <transformers combine.children="append">
                            <transformer
                                    implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer">
                            </transformer>
                        </transformers>
                    </configuration>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>
</project>

提交flink集群

bash 复制代码
#生成对应的任务
./flink run -m 192.168.1.161:8081 -c com.qyt.StreamWordCount /root/soft/flink-demo-1.0-SNAPSHOT.jar
# 恢复上一次保存点,bc5fae2e282247486003ed259f2f37a7为jobID
./flink run -s hdfs://hadoop01:9000/flink/bc5fae2e282247486003ed259f2f37a7/chk-33 -m 192.168.1.161:8081 -c com.qyt.StreamWordCount /root/soft/flink-demo-1.0-SNAPSHOT.jar

查看对应的jobId

相关推荐
阿里云大数据AI技术19 分钟前
用友畅捷通基于阿里云 MaxCompute 搭建智能数仓的落地实践
大数据·云原生·数据分析
EasyGBS39 分钟前
国标GB28181视频平台EasyCVR顺应智慧农业自动化趋势,打造大棚实时视频监控防线
大数据·网络·人工智能·安全·音视频
TDengine (老段)1 小时前
TDengine 窗口预聚集
大数据·数据库·物联网·时序数据库·tdengine·iotdb
TDengine (老段)1 小时前
TDengine JAVA 语言连接器
java·大数据·数据库·物联网·时序数据库·iot·tdengine
工作中的程序员9 小时前
flink Shuffle的总结
大数据·flink
EasyGBS13 小时前
如何实现两个视频融合EasyCVR平台的数据同步?详细步骤指南
大数据·网络·人工智能·安全·音视频
理智的煎蛋14 小时前
es 原生linux部署集群
大数据·linux·服务器·elasticsearch
谬了个大也14 小时前
es --- 集群数据迁移
大数据·elasticsearch
張萠飛16 小时前
flink cdc的source数据流如何配置事件时间,如何设置时间语义,分配时间戳并生成水位线
大数据·flink
一只专注api接口开发的技术猿16 小时前
京东API智能风控引擎:基于行为分析识别恶意爬虫与异常调用
大数据·开发语言·前端·爬虫