4、Flink执行模式(流/批)详解(下)

1、执行模式设置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

/**
 * bin/flink run -Dexecution.runtime-mode=BATCH <jarFile>
 */
public class _01_RuntimeMode {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

//        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
//        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);

        env.execute();
    }
}

2、流执行模式事件时间定时器案例

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;

import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 命令行输入的数据
 * 1,a,1714028400000
 * 1,b,1714028410000
 * 1,c,1714028410001
 * 1,d,1714028420000
 * 1,e,1714028420001
 */

/**
 * 元素=>1,a,1714028400000的事件时间为=>1714028400000
 * 元素=>1,b,1714028410000的事件时间为=>1714028410000
 * 元素=>1,c,1714028410001的事件时间为=>1714028410001
 * 元素=>1,d,1714028420000的事件时间为=>1714028420000
 * 元素=>1,e,1714028420001的事件时间为=>1714028420001
 * 元素=>2,f,1714028400000的事件时间为=>1714028400000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:10.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:20.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420001
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:20.001
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:30.000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430001
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:30.001
 * 1=>的输出结果为,[a, b, c, d, e],当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * ----------时间格式化-----------
 * 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:10.000
 * 2=>的输出结果为,[f],当前系统时间戳为=>2024-04-25 15:41:22.419
 */
public class _02_StreamingEventTimeService {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        DataStreamSource<String> source = env.socketTextStream("localhost", 8888);

        SingleOutputStreamOperator<String> sourceWithWaterMark = source.assignTimestampsAndWatermarks(WatermarkStrategy
                .<String>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<String>() {
                    @Override
                    public long extractTimestamp(String element, long recordTimestamp) {
                        System.out.println("元素=>" + element + "的事件时间为=>" + element.split(",")[2]);
                        return Long.parseLong(element.split(",")[2]);
                    }
                })
        );

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = sourceWithWaterMark.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        SingleOutputStreamOperator<Tuple2<Integer, List<String>>> res = map.keyBy(e -> e.f0)
                .process(new KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>() {

                    private ValueState<List<String>> state;
                    private final Long DELAY = 10000L;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        state = getRuntimeContext().getState(new ValueStateDescriptor<>("valueState"
                                , TypeInformation.of(new TypeHint<List<String>>() {
                        })));
                    }

                    @Override
                    public void onTimer(long timestamp, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.OnTimerContext ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        Integer currentKey = ctx.getCurrentKey();
                        List<String> value = state.value();
                        System.out.println("当前水位线为=>"+ctx.timerService().currentWatermark()+","+currentKey+"=>的输出结果为,"+value+",当前系统时间戳为=>"+time(System.currentTimeMillis()));
                        state.clear();
                    }

                    @Override
                    public void processElement(Tuple3<Integer, String, Long> input, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.Context ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        List<String> value = state.value();

                        if (value == null) {
                            value = new ArrayList<String>();
                            value.add(input.f1);
                        } else {
                            value.add(input.f1);
                        }

                        state.update(value);

                        long currentWatermark = ctx.timerService().currentWatermark();
                        long timer = input.f2 + DELAY;

                        System.out.println("当前水位线=>" + currentWatermark + ",当前定时器的触发时间=>" + timer);
                        System.out.println("----------时间格式化-----------");
                        System.out.println("当前水位线=>" + time(currentWatermark) + ",当前定时器的触发时间=>" + time(timer));

                        ctx.timerService().registerEventTimeTimer(timer);
                    }

                    @Override
                    public void close() throws Exception {
                        state.clear();
                    }

                    public String time(long timeStamp) {
                        return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(timeStamp));
                    }
                });

        res.print();

        env.execute();
    }
}

3、批执行模式事件时间定时器案例

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;
import org.apache.flink.util.Collector;

import java.io.File;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 文件中原始数据
 *
 * 1,a,1714028400000
 * 1,b,1714028410000
 * 1,c,1714028410001
 * 1,d,1714028420000
 * 1,e,1714028420001
 * 2,f,1714028400000
 */
/**
 * 元素=>1,a,1714028400000的事件时间为=>1714028400000
 * 元素=>1,b,1714028410000的事件时间为=>1714028410000
 * 元素=>1,c,1714028410001的事件时间为=>1714028410001
 * 元素=>1,d,1714028420000的事件时间为=>1714028420000
 * 元素=>1,e,1714028420001的事件时间为=>1714028420001
 * 元素=>2,f,1714028400000的事件时间为=>1714028400000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420001
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430000
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430001
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,[a, b, c, d, e],当前系统时间戳为=>2024-04-25 15:45:09.309
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
 * 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
 * 当前水位线为=>9223372036854775807,2=>的输出结果为,[f],当前系统时间戳为=>2024-04-25 15:45:09.310
 */
public class _03_BatchEventTimeService {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1);

        DataStreamSource<String> source = env.readFile(new TextInputFormat(
                        Path.fromLocalFile(new File("event.txt")))
                , "/Users/***/Desktop/event.txt"
                , FileProcessingMode.PROCESS_ONCE
                , 10000);

        SingleOutputStreamOperator<String> sourceWithWaterMark = source.assignTimestampsAndWatermarks(WatermarkStrategy
                .<String>forBoundedOutOfOrderness(Duration.ofSeconds(0))
                .withTimestampAssigner(new SerializableTimestampAssigner<String>() {
                    @Override
                    public long extractTimestamp(String element, long recordTimestamp) {
                        System.out.println("元素=>" + element + "的事件时间为=>" + element.split(",")[2]);
                        return Long.parseLong(element.split(",")[2]);
                    }
                })
        );

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = sourceWithWaterMark.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        SingleOutputStreamOperator<Tuple2<Integer, List<String>>> res = map.keyBy(e -> e.f0)
                .process(new KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>() {

                    private ValueState<List<String>> state;
                    private final Long DELAY = 10000L;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        state = getRuntimeContext().getState(new ValueStateDescriptor<>("valueState"
                                , TypeInformation.of(new TypeHint<List<String>>() {
                        })));
                    }

                    @Override
                    public void onTimer(long timestamp, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.OnTimerContext ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        Integer currentKey = ctx.getCurrentKey();
                        List<String> value = state.value();
                        System.out.println("当前水位线为=>"+ctx.timerService().currentWatermark()+","+currentKey + "=>的输出结果为," + value + ",当前系统时间戳为=>" + time(System.currentTimeMillis()));
                        state.clear();
                    }

                    @Override
                    public void processElement(Tuple3<Integer, String, Long> input, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.Context ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
                        List<String> value = state.value();

                        if (value == null) {
                            value = new ArrayList<String>();
                            value.add(input.f1);
                        } else {
                            value.add(input.f1);
                        }

                        state.update(value);

                        long currentWatermark = ctx.timerService().currentWatermark();
                        long timer = input.f2 + DELAY;

                        System.out.println("当前水位线=>" + currentWatermark + ",当前定时器的触发时间=>" + timer);
//                        System.out.println("----------时间格式化-----------");
//                        System.out.println("当前水位线=>" + time(currentWatermark) + ",当前定时器的触发时间=>" + time(timer));

                        ctx.timerService().registerEventTimeTimer(timer);
                    }

                    @Override
                    public void close() throws Exception {
                        state.clear();
                    }

                    public String time(long timeStamp) {
                        return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(timeStamp));
                    }
                });

        res.print();

        env.execute();
    }
}

4、批执行模式重启策略配置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;

import java.io.File;

public class _05_BatchJobFailRestart {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        env.setParallelism(1);

        // 设置重试策略
        RestartStrategies.RestartStrategyConfiguration restartStrategy = RestartStrategies.fixedDelayRestart(3, 5000);
        env.getConfig().setRestartStrategy(restartStrategy);

        DataStreamSource<String> source = env.readFile(new TextInputFormat(
                        Path.fromLocalFile(new File("no_order.txt")))
                , "/Users/hhx/Desktop/no_order.txt"
                , FileProcessingMode.PROCESS_ONCE
                , 10000);

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = source.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        map.print();
        env.execute();
    }
}

5、流执行模式重启策略和检查点配置

java 复制代码
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class _06_StreamJobFailRestartAndCheckpoint {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
        env.setParallelism(1);

        // 设置 检查点(Checkpoint)
        env.enableCheckpointing(5000L, CheckpointingMode.AT_LEAST_ONCE);
        // 设置 重试策略(RestartStrategies)
        RestartStrategies.RestartStrategyConfiguration restartStrategy = RestartStrategies.fixedDelayRestart(3, 5000);
        env.getConfig().setRestartStrategy(restartStrategy);

        DataStreamSource<String> source = env.socketTextStream("localhost",8888);

        SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = source.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
            @Override
            public Tuple3<Integer, String, Long> map(String element) throws Exception {
                return new Tuple3<>(Integer.parseInt(element.split(",")[0])
                        , element.split(",")[1]
                        , Long.parseLong(element.split(",")[2])
                );
            }
        });

        map.print();
        env.execute();
    }
}
相关推荐
Json_181790144802 小时前
An In-depth Look into the 1688 Product Details Data API Interface
大数据·json
Qspace丨轻空间4 小时前
气膜场馆:推动体育文化旅游创新发展的关键力量—轻空间
大数据·人工智能·安全·生活·娱乐
Elastic 中国社区官方博客5 小时前
如何将数据从 AWS S3 导入到 Elastic Cloud - 第 3 部分:Elastic S3 连接器
大数据·elasticsearch·搜索引擎·云计算·全文检索·可用性测试·aws
Aloudata6 小时前
从Apache Atlas到Aloudata BIG,数据血缘解析有何改变?
大数据·apache·数据血缘·主动元数据·数据链路
水豚AI课代表6 小时前
分析报告、调研报告、工作方案等的提示词
大数据·人工智能·学习·chatgpt·aigc
拓端研究室TRL9 小时前
【梯度提升专题】XGBoost、Adaboost、CatBoost预测合集:抗乳腺癌药物优化、信贷风控、比特币应用|附数据代码...
大数据
黄焖鸡能干四碗9 小时前
信息化运维方案,实施方案,开发方案,信息中心安全运维资料(软件资料word)
大数据·人工智能·软件需求·设计规范·规格说明书
编码小袁9 小时前
探索数据科学与大数据技术专业本科生的广阔就业前景
大数据
WeeJot嵌入式10 小时前
大数据治理:确保数据的可持续性和价值
大数据
zmd-zk11 小时前
kafka+zookeeper的搭建
大数据·分布式·zookeeper·中间件·kafka