4、Flink执行模式(流/批)详解(下)

1、执行模式设置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

/**
 * bin/flink run -Dexecution.runtime-mode=BATCH <jarFile>
 */
public class _01_RuntimeMode {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

//        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
//        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);

        env.execute();
    }
}

2、流执行模式事件时间定时器案例

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 命令行输入的数据
 * 1,a,1714028400000
 * 1,b,1714028410000
 * 1,c,1714028410001
 * 1,d,1714028420000
 * 1,e,1714028420001
 */

/**
 * 元素=>1,a,1714028400000的事件时间为=>1714028400000
 * 元素=>1,b,1714028410000的事件时间为=>1714028410000
 * 元素=>1,c,1714028410001的事件时间为=>1714028410001
 * 元素=>1,d,1714028420000的事件时间为=>1714028420000
 * 元素=>1,e,1714028420001的事件时间为=>1714028420001
 * 元素=>2,f,1714028400000的事件时间为=>1714028400000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:10.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:20.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420001
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:20.001
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:30.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430001
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:30.001
 * 1=>的输出结果为,[a, b, c, d, e],当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:10.000
 * 2=>的输出结果为,[f],当前系统时间戳为=>2024-04-25 15:41:22.419
 */
public class _02_StreamingEventTimeService {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        DataStreamSource<String> source = env.socketTextStream("localhost", 8888);

        SingleOutputStreamOperator<String> sourceWithWaterMark = source.assignTimestampsAndWatermarks(WatermarkStrategy
                .<String>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<String>() {
                    @Override
                    public long extractTimestamp(String element, long recordTimestamp) {
                        System.out.println("元素=>" + element + "的事件时间为=>" + element.split(",")[2]);
                        return Long.parseLong(element.split(",")[2]);
                    }
                })
        );

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = sourceWithWaterMark.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        SingleOutputStreamOperator<Tuple2<Integer, List<String>>> res = map.keyBy(e -> e.f0)
                .process(new KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>() {

                    private ValueState<List<String>> state;
                    private final Long DELAY = 10000L;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        state = getRuntimeContext().getState(new ValueStateDescriptor<>("valueState"
                                , TypeInformation.of(new TypeHint<List<String>>() {
                        })));
                    }

                    @Override
                    public void onTimer(long timestamp, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.OnTimerContext ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        Integer currentKey = ctx.getCurrentKey();
                        List<String> value = state.value();
                        System.out.println("当前水位线为=>"+ctx.timerService().currentWatermark()+","+currentKey+"=>的输出结果为,"+value+",当前系统时间戳为=>"+time(System.currentTimeMillis()));
                        state.clear();
                    }

                    @Override
                    public void processElement(Tuple3<Integer, String, Long> input, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.Context ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        List<String> value = state.value();

                        if (value == null) {
                            value = new ArrayList<String>();
                            value.add(input.f1);
                        } else {
                            value.add(input.f1);
                        }

                        state.update(value);

                        long currentWatermark = ctx.timerService().currentWatermark();
                        long timer = input.f2 + DELAY;

                        System.out.println("当前水位线=>" + currentWatermark + ",当前定时器的触发时间=>" + timer);
                        System.out.println("----------时间格式化-----------");
                        System.out.println("当前水位线=>" + time(currentWatermark) + ",当前定时器的触发时间=>" + time(timer));

                        ctx.timerService().registerEventTimeTimer(timer);
                    }

                    @Override
                    public void close() throws Exception {
                        state.clear();
                    }

                    public String time(long timeStamp) {
                        return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(timeStamp));
                    }
                });

        res.print();

        env.execute();
    }
}

3、批执行模式事件时间定时器案例

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;
import org.apache.flink.util.Collector;

import java.io.File;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 文件中原始数据
 *
 * 1,a,1714028400000
 * 1,b,1714028410000
 * 1,c,1714028410001
 * 1,d,1714028420000
 * 1,e,1714028420001
 * 2,f,1714028400000
 */
/**
 * 元素=>1,a,1714028400000的事件时间为=>1714028400000
 * 元素=>1,b,1714028410000的事件时间为=>1714028410000
 * 元素=>1,c,1714028410001的事件时间为=>1714028410001
 * 元素=>1,d,1714028420000的事件时间为=>1714028420000
 * 元素=>1,e,1714028420001的事件时间为=>1714028420001
 * 元素=>2,f,1714028400000的事件时间为=>1714028400000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420001
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430001
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,[a, b, c, d, e],当前系统时间戳为=>2024-04-25 15:45:09.309
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * 当前水位线为=>9223372036854775807,2=>的输出结果为,[f],当前系统时间戳为=>2024-04-25 15:45:09.310
 */
public class _03_BatchEventTimeService {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1);

        DataStreamSource<String> source = env.readFile(new TextInputFormat(
                        Path.fromLocalFile(new File("event.txt")))
                , "/Users/***/Desktop/event.txt"
                , FileProcessingMode.PROCESS_ONCE
                , 10000);

        SingleOutputStreamOperator<String> sourceWithWaterMark = source.assignTimestampsAndWatermarks(WatermarkStrategy
                .<String>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<String>() {
                    @Override
                    public long extractTimestamp(String element, long recordTimestamp) {
                        System.out.println("元素=>" + element + "的事件时间为=>" + element.split(",")[2]);
                        return Long.parseLong(element.split(",")[2]);
                    }
                })
        );

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = sourceWithWaterMark.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        SingleOutputStreamOperator<Tuple2<Integer, List<String>>> res = map.keyBy(e -> e.f0)
                .process(new KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>() {

                    private ValueState<List<String>> state;
                    private final Long DELAY = 10000L;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        state = getRuntimeContext().getState(new ValueStateDescriptor<>("valueState"
                                , TypeInformation.of(new TypeHint<List<String>>() {
                        })));
                    }

                    @Override
                    public void onTimer(long timestamp, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.OnTimerContext ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        Integer currentKey = ctx.getCurrentKey();
                        List<String> value = state.value();
                        System.out.println("当前水位线为=>"+ctx.timerService().currentWatermark()+","+currentKey + "=>的输出结果为," + value + ",当前系统时间戳为=>" + time(System.currentTimeMillis()));
                        state.clear();
                    }

                    @Override
                    public void processElement(Tuple3<Integer, String, Long> input, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.Context ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        List<String> value = state.value();

                        if (value == null) {
                            value = new ArrayList<String>();
                            value.add(input.f1);
                        } else {
                            value.add(input.f1);
                        }

                        state.update(value);

                        long currentWatermark = ctx.timerService().currentWatermark();
                        long timer = input.f2 + DELAY;

                        System.out.println("当前水位线=>" + currentWatermark + ",当前定时器的触发时间=>" + timer);
//                        System.out.println("----------时间格式化-----------");
//                        System.out.println("当前水位线=>" + time(currentWatermark) + ",当前定时器的触发时间=>" + time(timer));

                        ctx.timerService().registerEventTimeTimer(timer);
                    }

                    @Override
                    public void close() throws Exception {
                        state.clear();
                    }

                    public String time(long timeStamp) {
                        return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(timeStamp));
                    }
                });

        res.print();

        env.execute();
    }
}

4、批执行模式重启策略配置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;

import java.io.File;

public class _05_BatchJobFailRestart {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setParallelism(1);

        // 设置重试策略
        RestartStrategies.RestartStrategyConfiguration restartStrategy = RestartStrategies.fixedDelayRestart(3, 5000);
        env.getConfig().setRestartStrategy(restartStrategy);

        DataStreamSource<String> source = env.readFile(new TextInputFormat(
                        Path.fromLocalFile(new File("no_order.txt")))
                , "/Users/hhx/Desktop/no_order.txt"
                , FileProcessingMode.PROCESS_ONCE
                , 10000);

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = source.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        map.print();
        env.execute();
    }
}

5、流执行模式重启策略和检查点配置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class _06_StreamJobFailRestartAndCheckpoint {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setParallelism(1);

        // 设置 检查点(Checkpoint)
        env.enableCheckpointing(5000L, CheckpointingMode.AT_LEAST_ONCE);
        // 设置 重试策略(RestartStrategies)
        RestartStrategies.RestartStrategyConfiguration restartStrategy = RestartStrategies.fixedDelayRestart(3, 5000);
        env.getConfig().setRestartStrategy(restartStrategy);

        DataStreamSource<String> source = env.socketTextStream("localhost",8888);

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = source.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        map.print();
        env.execute();
    }
}
相关推荐
V搜xhliang02465 小时前
机器人建模(URDF)与仿真配置
大数据·人工智能·深度学习·机器学习·自然语言处理·机器人
房产中介行业研习社5 小时前
2026年3月哪些房源管理系统功能全
大数据·运维·人工智能
玄微云6 小时前
2026年通用软件难适配,垂直店务系统反而更省心
大数据·云计算·软件需求
Elastic 中国社区官方博客7 小时前
Elastic 为什么捐赠其 OpenTelemetry PHP 发行版
大数据·开发语言·elasticsearch·搜索引擎·信息可视化·全文检索·php
方向研究7 小时前
ABS生产
大数据
TDengine (老段)7 小时前
TDengine 视图功能使用
大数据·数据库·servlet·时序数据库·tdengine·涛思数据
TDengine (老段)7 小时前
TDengine IDMP 运维指南 —— 部署架构
大数据·运维·数据库·架构·时序数据库·tdengine·涛思数据
utmhikari8 小时前
【测试人生】变更规则校验Agent研发的一些思路
大数据·人工智能·llm·agent·变更风险·openclaw
AC赳赳老秦8 小时前
DeepSeek优化多智能体指令:避免协同冲突,提升自动化流程稳定性
android·大数据·运维·人工智能·自然语言处理·自动化·deepseek
成长之路5149 小时前
【数据集】A股上市公司数字投资数据集-含代码(2000-2024年)
大数据