4、Flink执行模式(流/批)详解(下)

1、执行模式设置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

/**
 * bin/flink run -Dexecution.runtime-mode=BATCH <jarFile>
 */
public class _01_RuntimeMode {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

//        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
//        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);

        env.execute();
    }
}

2、流执行模式事件时间定时器案例

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 命令行输入的数据
 * 1,a,1714028400000
 * 1,b,1714028410000
 * 1,c,1714028410001
 * 1,d,1714028420000
 * 1,e,1714028420001
 */

/**
 * 元素=>1,a,1714028400000的事件时间为=>1714028400000
 * 元素=>1,b,1714028410000的事件时间为=>1714028410000
 * 元素=>1,c,1714028410001的事件时间为=>1714028410001
 * 元素=>1,d,1714028420000的事件时间为=>1714028420000
 * 元素=>1,e,1714028420001的事件时间为=>1714028420001
 * 元素=>2,f,1714028400000的事件时间为=>1714028400000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:10.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:20.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420001
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:20.001
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:30.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430001
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:30.001
 * 1=>的输出结果为,[a, b, c, d, e],当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:10.000
 * 2=>的输出结果为,[f],当前系统时间戳为=>2024-04-25 15:41:22.419
 */
public class _02_StreamingEventTimeService {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        DataStreamSource<String> source = env.socketTextStream("localhost", 8888);

        SingleOutputStreamOperator<String> sourceWithWaterMark = source.assignTimestampsAndWatermarks(WatermarkStrategy
                .<String>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<String>() {
                    @Override
                    public long extractTimestamp(String element, long recordTimestamp) {
                        System.out.println("元素=>" + element + "的事件时间为=>" + element.split(",")[2]);
                        return Long.parseLong(element.split(",")[2]);
                    }
                })
        );

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = sourceWithWaterMark.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        SingleOutputStreamOperator<Tuple2<Integer, List<String>>> res = map.keyBy(e -> e.f0)
                .process(new KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>() {

                    private ValueState<List<String>> state;
                    private final Long DELAY = 10000L;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        state = getRuntimeContext().getState(new ValueStateDescriptor<>("valueState"
                                , TypeInformation.of(new TypeHint<List<String>>() {
                        })));
                    }

                    @Override
                    public void onTimer(long timestamp, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.OnTimerContext ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        Integer currentKey = ctx.getCurrentKey();
                        List<String> value = state.value();
                        System.out.println("当前水位线为=>"+ctx.timerService().currentWatermark()+","+currentKey+"=>的输出结果为,"+value+",当前系统时间戳为=>"+time(System.currentTimeMillis()));
                        state.clear();
                    }

                    @Override
                    public void processElement(Tuple3<Integer, String, Long> input, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.Context ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        List<String> value = state.value();

                        if (value == null) {
                            value = new ArrayList<String>();
                            value.add(input.f1);
                        } else {
                            value.add(input.f1);
                        }

                        state.update(value);

                        long currentWatermark = ctx.timerService().currentWatermark();
                        long timer = input.f2 + DELAY;

                        System.out.println("当前水位线=>" + currentWatermark + ",当前定时器的触发时间=>" + timer);
                        System.out.println("----------时间格式化-----------");
                        System.out.println("当前水位线=>" + time(currentWatermark) + ",当前定时器的触发时间=>" + time(timer));

                        ctx.timerService().registerEventTimeTimer(timer);
                    }

                    @Override
                    public void close() throws Exception {
                        state.clear();
                    }

                    public String time(long timeStamp) {
                        return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(timeStamp));
                    }
                });

        res.print();

        env.execute();
    }
}

3、批执行模式事件时间定时器案例

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;
import org.apache.flink.util.Collector;

import java.io.File;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 文件中原始数据
 *
 * 1,a,1714028400000
 * 1,b,1714028410000
 * 1,c,1714028410001
 * 1,d,1714028420000
 * 1,e,1714028420001
 * 2,f,1714028400000
 */
/**
 * 元素=>1,a,1714028400000的事件时间为=>1714028400000
 * 元素=>1,b,1714028410000的事件时间为=>1714028410000
 * 元素=>1,c,1714028410001的事件时间为=>1714028410001
 * 元素=>1,d,1714028420000的事件时间为=>1714028420000
 * 元素=>1,e,1714028420001的事件时间为=>1714028420001
 * 元素=>2,f,1714028400000的事件时间为=>1714028400000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420001
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430001
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,[a, b, c, d, e],当前系统时间戳为=>2024-04-25 15:45:09.309
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * 当前水位线为=>9223372036854775807,2=>的输出结果为,[f],当前系统时间戳为=>2024-04-25 15:45:09.310
 */
public class _03_BatchEventTimeService {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1);

        DataStreamSource<String> source = env.readFile(new TextInputFormat(
                        Path.fromLocalFile(new File("event.txt")))
                , "/Users/***/Desktop/event.txt"
                , FileProcessingMode.PROCESS_ONCE
                , 10000);

        SingleOutputStreamOperator<String> sourceWithWaterMark = source.assignTimestampsAndWatermarks(WatermarkStrategy
                .<String>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<String>() {
                    @Override
                    public long extractTimestamp(String element, long recordTimestamp) {
                        System.out.println("元素=>" + element + "的事件时间为=>" + element.split(",")[2]);
                        return Long.parseLong(element.split(",")[2]);
                    }
                })
        );

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = sourceWithWaterMark.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        SingleOutputStreamOperator<Tuple2<Integer, List<String>>> res = map.keyBy(e -> e.f0)
                .process(new KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>() {

                    private ValueState<List<String>> state;
                    private final Long DELAY = 10000L;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        state = getRuntimeContext().getState(new ValueStateDescriptor<>("valueState"
                                , TypeInformation.of(new TypeHint<List<String>>() {
                        })));
                    }

                    @Override
                    public void onTimer(long timestamp, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.OnTimerContext ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        Integer currentKey = ctx.getCurrentKey();
                        List<String> value = state.value();
                        System.out.println("当前水位线为=>"+ctx.timerService().currentWatermark()+","+currentKey + "=>的输出结果为," + value + ",当前系统时间戳为=>" + time(System.currentTimeMillis()));
                        state.clear();
                    }

                    @Override
                    public void processElement(Tuple3<Integer, String, Long> input, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.Context ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        List<String> value = state.value();

                        if (value == null) {
                            value = new ArrayList<String>();
                            value.add(input.f1);
                        } else {
                            value.add(input.f1);
                        }

                        state.update(value);

                        long currentWatermark = ctx.timerService().currentWatermark();
                        long timer = input.f2 + DELAY;

                        System.out.println("当前水位线=>" + currentWatermark + ",当前定时器的触发时间=>" + timer);
//                        System.out.println("----------时间格式化-----------");
//                        System.out.println("当前水位线=>" + time(currentWatermark) + ",当前定时器的触发时间=>" + time(timer));

                        ctx.timerService().registerEventTimeTimer(timer);
                    }

                    @Override
                    public void close() throws Exception {
                        state.clear();
                    }

                    public String time(long timeStamp) {
                        return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(timeStamp));
                    }
                });

        res.print();

        env.execute();
    }
}

4、批执行模式重启策略配置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;

import java.io.File;

public class _05_BatchJobFailRestart {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setParallelism(1);

        // 设置重试策略
        RestartStrategies.RestartStrategyConfiguration restartStrategy = RestartStrategies.fixedDelayRestart(3, 5000);
        env.getConfig().setRestartStrategy(restartStrategy);

        DataStreamSource<String> source = env.readFile(new TextInputFormat(
                        Path.fromLocalFile(new File("no_order.txt")))
                , "/Users/hhx/Desktop/no_order.txt"
                , FileProcessingMode.PROCESS_ONCE
                , 10000);

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = source.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        map.print();
        env.execute();
    }
}

5、流执行模式重启策略和检查点配置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class _06_StreamJobFailRestartAndCheckpoint {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setParallelism(1);

        // 设置 检查点(Checkpoint)
        env.enableCheckpointing(5000L, CheckpointingMode.AT_LEAST_ONCE);
        // 设置 重试策略(RestartStrategies)
        RestartStrategies.RestartStrategyConfiguration restartStrategy = RestartStrategies.fixedDelayRestart(3, 5000);
        env.getConfig().setRestartStrategy(restartStrategy);

        DataStreamSource<String> source = env.socketTextStream("localhost",8888);

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = source.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        map.print();
        env.execute();
    }
}
相关推荐
一只专注api接口开发的技术猿5 小时前
微服务架构下集成淘宝商品 API 的实践与思考
java·大数据·开发语言·数据库·微服务·架构
AC赳赳老秦5 小时前
Dify工作流+DeepSeek:运维自动化闭环(数据采集→报告生成)
android·大数据·运维·数据库·人工智能·golang·deepseek
明洞日记5 小时前
【软考每日一练009】计算机系统性能评价:基准程序分类与 TPC 实战案例详解
大数据·数据库
李慕婉学姐5 小时前
【开题答辩过程】以《基于Spring Boot和大数据的医院挂号系统的设计与实现》为例,不知道这个选题怎么做的,不知道这个选题怎么开题答辩的可以进来看看
大数据·spring boot·后端
汽车仪器仪表相关领域5 小时前
全程高温伴热,NOx瞬态精准捕捉:MEXA-1170HCLD加热型NOx测定装置项目实战全解
大数据·服务器·网络·人工智能·功能测试·单元测试·可用性测试
橙露5 小时前
嵌入式实时操作系统 FreeRTOS:任务调度与信号量的核心应用
java·大数据·服务器
DO_Community5 小时前
DigitalOcean携手Persistent达成战略合作,让 AI 更亲民、更易扩展
大数据·人工智能·ai·llm·区块链
乾元6 小时前
数据为王——安全数据集的清洗与特征工程
大数据·网络·人工智能·安全·web安全·机器学习·架构
2501_942158436 小时前
服务设计从成本到利润引擎的重构
大数据·python·重构
萤丰信息6 小时前
智慧园区:科技赋能的未来产业生态新载体
大数据·运维·人工智能·科技·智慧园区