1、执行模式设置
java
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* bin/flink run -Dexecution.runtime-mode=BATCH <jarFile>
*/
public class _01_RuntimeMode {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
// env.setRuntimeMode(RuntimeExecutionMode.BATCH);
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
env.execute();
}
}
2、流执行模式事件时间定时器案例
java
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* 命令行输入的数据
* 1,a,1714028400000
* 1,b,1714028410000
* 1,c,1714028410001
* 1,d,1714028420000
* 1,e,1714028420001
*/
/**
* 元素=>1,a,1714028400000的事件时间为=>1714028400000
* 元素=>1,b,1714028410000的事件时间为=>1714028410000
* 元素=>1,c,1714028410001的事件时间为=>1714028410001
* 元素=>1,d,1714028420000的事件时间为=>1714028420000
* 元素=>1,e,1714028420001的事件时间为=>1714028420001
* 元素=>2,f,1714028400000的事件时间为=>1714028400000
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
* ----------时间格式化-----------
* 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:10.000
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420000
* ----------时间格式化-----------
* 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:20.000
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420001
* ----------时间格式化-----------
* 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:20.001
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430000
* ----------时间格式化-----------
* 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:30.000
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430001
* ----------时间格式化-----------
* 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:30.001
* 1=>的输出结果为,[a, b, c, d, e],当前系统时间戳为=>2024-04-25 15:41:22.418
* 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
* 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
* 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
* 1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:41:22.418
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
* ----------时间格式化-----------
* 当前水位线=>292269055-12-03 00:47:04.192,当前定时器的触发时间=>2024-04-25 15:00:10.000
* 2=>的输出结果为,[f],当前系统时间戳为=>2024-04-25 15:41:22.419
*/
public class _02_StreamingEventTimeService {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
DataStreamSource<String> source = env.socketTextStream("localhost", 8888);
SingleOutputStreamOperator<String> sourceWithWaterMark = source.assignTimestampsAndWatermarks(WatermarkStrategy
.<String>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<String>() {
@Override
public long extractTimestamp(String element, long recordTimestamp) {
System.out.println("元素=>" + element + "的事件时间为=>" + element.split(",")[2]);
return Long.parseLong(element.split(",")[2]);
}
})
);
SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = sourceWithWaterMark.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
@Override
public Tuple3<Integer, String, Long> map(String element) throws Exception {
return new Tuple3<>(Integer.parseInt(element.split(",")[0])
, element.split(",")[1]
, Long.parseLong(element.split(",")[2])
);
}
});
SingleOutputStreamOperator<Tuple2<Integer, List<String>>> res = map.keyBy(e -> e.f0)
.process(new KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>() {
private ValueState<List<String>> state;
private final Long DELAY = 10000L;
@Override
public void open(Configuration parameters) throws Exception {
state = getRuntimeContext().getState(new ValueStateDescriptor<>("valueState"
, TypeInformation.of(new TypeHint<List<String>>() {
})));
}
@Override
public void onTimer(long timestamp, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.OnTimerContext ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
Integer currentKey = ctx.getCurrentKey();
List<String> value = state.value();
System.out.println("当前水位线为=>"+ctx.timerService().currentWatermark()+","+currentKey+"=>的输出结果为,"+value+",当前系统时间戳为=>"+time(System.currentTimeMillis()));
state.clear();
}
@Override
public void processElement(Tuple3<Integer, String, Long> input, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.Context ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
List<String> value = state.value();
if (value == null) {
value = new ArrayList<String>();
value.add(input.f1);
} else {
value.add(input.f1);
}
state.update(value);
long currentWatermark = ctx.timerService().currentWatermark();
long timer = input.f2 + DELAY;
System.out.println("当前水位线=>" + currentWatermark + ",当前定时器的触发时间=>" + timer);
System.out.println("----------时间格式化-----------");
System.out.println("当前水位线=>" + time(currentWatermark) + ",当前定时器的触发时间=>" + time(timer));
ctx.timerService().registerEventTimeTimer(timer);
}
@Override
public void close() throws Exception {
state.clear();
}
public String time(long timeStamp) {
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(timeStamp));
}
});
res.print();
env.execute();
}
}
3、批执行模式事件时间定时器案例
java
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;
import org.apache.flink.util.Collector;
import java.io.File;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
* 文件中原始数据
*
* 1,a,1714028400000
* 1,b,1714028410000
* 1,c,1714028410001
* 1,d,1714028420000
* 1,e,1714028420001
* 2,f,1714028400000
*/
/**
* 元素=>1,a,1714028400000的事件时间为=>1714028400000
* 元素=>1,b,1714028410000的事件时间为=>1714028410000
* 元素=>1,c,1714028410001的事件时间为=>1714028410001
* 元素=>1,d,1714028420000的事件时间为=>1714028420000
* 元素=>1,e,1714028420001的事件时间为=>1714028420001
* 元素=>2,f,1714028400000的事件时间为=>1714028400000
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420000
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028420001
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430000
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028430001
* 当前水位线为=>9223372036854775807,1=>的输出结果为,[a, b, c, d, e],当前系统时间戳为=>2024-04-25 15:45:09.309
* 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
* 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
* 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
* 当前水位线为=>9223372036854775807,1=>的输出结果为,null,当前系统时间戳为=>2024-04-25 15:45:09.310
* 当前水位线=>-9223372036854775808,当前定时器的触发时间=>1714028410000
* 当前水位线为=>9223372036854775807,2=>的输出结果为,[f],当前系统时间戳为=>2024-04-25 15:45:09.310
*/
public class _03_BatchEventTimeService {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(1);
DataStreamSource<String> source = env.readFile(new TextInputFormat(
Path.fromLocalFile(new File("event.txt")))
, "/Users/***/Desktop/event.txt"
, FileProcessingMode.PROCESS_ONCE
, 10000);
SingleOutputStreamOperator<String> sourceWithWaterMark = source.assignTimestampsAndWatermarks(WatermarkStrategy
.<String>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<String>() {
@Override
public long extractTimestamp(String element, long recordTimestamp) {
System.out.println("元素=>" + element + "的事件时间为=>" + element.split(",")[2]);
return Long.parseLong(element.split(",")[2]);
}
})
);
SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = sourceWithWaterMark.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
@Override
public Tuple3<Integer, String, Long> map(String element) throws Exception {
return new Tuple3<>(Integer.parseInt(element.split(",")[0])
, element.split(",")[1]
, Long.parseLong(element.split(",")[2])
);
}
});
SingleOutputStreamOperator<Tuple2<Integer, List<String>>> res = map.keyBy(e -> e.f0)
.process(new KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>() {
private ValueState<List<String>> state;
private final Long DELAY = 10000L;
@Override
public void open(Configuration parameters) throws Exception {
state = getRuntimeContext().getState(new ValueStateDescriptor<>("valueState"
, TypeInformation.of(new TypeHint<List<String>>() {
})));
}
@Override
public void onTimer(long timestamp, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.OnTimerContext ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
Integer currentKey = ctx.getCurrentKey();
List<String> value = state.value();
System.out.println("当前水位线为=>"+ctx.timerService().currentWatermark()+","+currentKey + "=>的输出结果为," + value + ",当前系统时间戳为=>" + time(System.currentTimeMillis()));
state.clear();
}
@Override
public void processElement(Tuple3<Integer, String, Long> input, KeyedProcessFunction<Integer, Tuple3<Integer, String, Long>, Tuple2<Integer, List<String>>>.Context ctx, Collector<Tuple2<Integer, List<String>>> out) throws Exception {
List<String> value = state.value();
if (value == null) {
value = new ArrayList<String>();
value.add(input.f1);
} else {
value.add(input.f1);
}
state.update(value);
long currentWatermark = ctx.timerService().currentWatermark();
long timer = input.f2 + DELAY;
System.out.println("当前水位线=>" + currentWatermark + ",当前定时器的触发时间=>" + timer);
// System.out.println("----------时间格式化-----------");
// System.out.println("当前水位线=>" + time(currentWatermark) + ",当前定时器的触发时间=>" + time(timer));
ctx.timerService().registerEventTimeTimer(timer);
}
@Override
public void close() throws Exception {
state.clear();
}
public String time(long timeStamp) {
return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS").format(new Date(timeStamp));
}
});
res.print();
env.execute();
}
}
4、批执行模式重启策略配置
java
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;
import java.io.File;
public class _05_BatchJobFailRestart {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.BATCH);
env.setParallelism(1);
// 设置重试策略
RestartStrategies.RestartStrategyConfiguration restartStrategy = RestartStrategies.fixedDelayRestart(3, 5000);
env.getConfig().setRestartStrategy(restartStrategy);
DataStreamSource<String> source = env.readFile(new TextInputFormat(
Path.fromLocalFile(new File("no_order.txt")))
, "/Users/hhx/Desktop/no_order.txt"
, FileProcessingMode.PROCESS_ONCE
, 10000);
SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = source.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
@Override
public Tuple3<Integer, String, Long> map(String element) throws Exception {
return new Tuple3<>(Integer.parseInt(element.split(",")[0])
, element.split(",")[1]
, Long.parseLong(element.split(",")[2])
);
}
});
map.print();
env.execute();
}
}
5、流执行模式重启策略和检查点配置
java
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
public class _06_StreamJobFailRestartAndCheckpoint {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.STREAMING);
env.setParallelism(1);
// 设置 检查点(Checkpoint)
env.enableCheckpointing(5000L, CheckpointingMode.AT_LEAST_ONCE);
// 设置 重试策略(RestartStrategies)
RestartStrategies.RestartStrategyConfiguration restartStrategy = RestartStrategies.fixedDelayRestart(3, 5000);
env.getConfig().setRestartStrategy(restartStrategy);
DataStreamSource<String> source = env.socketTextStream("localhost",8888);
SingleOutputStreamOperator<Tuple3<Integer, String, Long>> map = source.map(new MapFunction<String, Tuple3<Integer, String, Long>>() {
@Override
public Tuple3<Integer, String, Long> map(String element) throws Exception {
return new Tuple3<>(Integer.parseInt(element.split(",")[0])
, element.split(",")[1]
, Long.parseLong(element.split(",")[2])
);
}
});
map.print();
env.execute();
}
}