背景
算子的列表状态是平时比较常见的一种状态,本文通过官方的例子来看一下怎么使用算子列表状态
算子列表状态
算子列表状态支持应用的并行度扩缩容,如下所示:
使用方法参见官方示例,我加了几个注解:
java
public class BufferingSink
implements SinkFunction<Tuple2<String, Integer>>,
CheckpointedFunction {//要实现CheckpointedFunction接口
private final int threshold;
//算子操作状态对象--算子级别的
private transient ListState<Tuple2<String, Integer>> checkpointedState;
//本地变量,保存这个算子任务的本地变量--任务级别的
private List<Tuple2<String, Integer>> bufferedElements;
public BufferingSink(int threshold) {
this.threshold = threshold;
this.bufferedElements = new ArrayList<>();
}
//invoke方法中一般都是操作本地变量bufferedElements,不会直接操作算子列表状态
@Override
public void invoke(Tuple2<String, Integer> value, Context contex) throws Exception {
bufferedElements.add(value);
if (bufferedElements.size() >= threshold) {
for (Tuple2<String, Integer> element: bufferedElements) {
// send it to the sink
}
bufferedElements.clear();
}
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
checkpointedState.clear();
for (Tuple2<String, Integer> element : bufferedElements) {
// 把本地变量的值设置到算子列表状态中,算子列表状态会自动会被持久化
checkpointedState.add(element);
}
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
ListStateDescriptor<Tuple2<String, Integer>> descriptor =
new ListStateDescriptor<>(
"buffered-elements",
TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {}));
// 定义算子列表状态
checkpointedState = context.getOperatorStateStore().getListState(descriptor);
if (context.isRestored()) {
// 算子列表状态的值设置到本地变量中
for (Tuple2<String, Integer> element : checkpointedState.get()) {
bufferedElements.add(element);
}
}
}
}