Scala
复制代码
package com.yy.state.operatorStateDemo
import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeHint, TypeInformation}
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.api.java.typeutils.ListTypeInfo
import org.apache.flink.streaming.api.datastream.KeyedStream
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment
import org.apache.flink.util.Collector
import java.time.ZoneId
import scala.collection.JavaConverters.iterableAsScalaIterableConverter
import scala.collection.mutable.ListBuffer
import scala.collection.JavaConverters._
/**
* 广播流 官方案例 scala版本
* 广播状态
* https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/
* 事件流:
* red,4side
* red,5side
* red,4side
* red,1side
* red,4side
*
* 规则流:
* rule1,4side,1side
* 广播规则流,使用mapstate存储每种规则和对应的事件流数据 eg: {rule1 -> [4side,4side]} 遇到1side到来,则全部输出.
* map可存储多个规则
*/
object BroadcastStateV1 {
case class Item(color:Color,shape: Shape){
def getShape()={
shape
}
}
case class Rule(name:String,first:Shape,second:Shape)
case class Color(color:String)
case class Shape(shape:String)
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
val tEnv = StreamTableEnvironment.create(env)
// 指定国内时区
tEnv.getConfig.setLocalTimeZone(ZoneId.of("Asia/Shanghai"))
val itemStream = env.socketTextStream("localhost", 9999)
.map(_.split(","))
.map(arr => Item(Color(arr(0)), Shape(arr(1))))
val ruleStream = env.socketTextStream("localhost", 9998).broadcast()
.map(s => Rule(s.split(",")(0), Shape(s.split(",")(1)), Shape(s.split(",")(2))))
val ruleStateDescriptor = new MapStateDescriptor(
"RulesBroadcastState",
BasicTypeInfo.STRING_TYPE_INFO,
TypeInformation.of(new TypeHint[Rule](){}));
val ruleBroadcastStream = ruleStream.broadcast(ruleStateDescriptor)
val colorPartitionedStream: KeyedStream[Item, Color] = itemStream
.keyBy(new KeySelector[Item, Color] {
override def getKey(value: Item): Color = value.color
})
colorPartitionedStream
.connect(ruleBroadcastStream)
.process(
// type arguments in our KeyedBroadcastProcessFunction represent:
// 1. the key of the keyed stream
// 2. the type of elements in the non-broadcast side
// 3. the type of elements in the broadcast side
// 4. the type of the result, here a string
new KeyedBroadcastProcessFunction[Color, Item, Rule, String]() {
val mapStateDesc =
new MapStateDescriptor(
"items",
BasicTypeInfo.STRING_TYPE_INFO,
new ListTypeInfo(classOf[Item]))
val ruleStateDescriptor =
new MapStateDescriptor(
"RulesBroadcastState",
BasicTypeInfo.STRING_TYPE_INFO,
TypeInformation.of(new TypeHint[Rule]() {}))
override def processElement(value: Item, ctx: KeyedBroadcastProcessFunction[Color, Item, Rule, String]#ReadOnlyContext, out: Collector[String]): Unit = {
val state = getRuntimeContext().getMapState(mapStateDesc)
val shape = value.getShape()
// 遍历广播的 rule
ctx.getBroadcastState(ruleStateDescriptor).immutableEntries().asScala.foreach{
entry =>
val ruleName = entry.getKey()
val rule = entry.getValue()
val stored: ListBuffer[Item] = {
if (state.contains(ruleName)) {
state.get(ruleName).asScala.to[ListBuffer]
} else {
new ListBuffer[Item]()
}
}
//
if (shape == rule.second && stored.nonEmpty) {
stored.foreach { i =>
out.collect("MATCH: " + i + " - " + value);
}
stored.clear();
}
// there is no else{} to cover if rule.first == rule.second
if (shape.equals(rule.first)) {
stored.append(value);
}
if (stored.isEmpty) {
// 规则已经匹配输出 清理状态
state.remove(ruleName)
} else {
// 没输出则更新状态
state.put(ruleName, stored.asJava)
}
}
}
override def processBroadcastElement(value: Rule, ctx: KeyedBroadcastProcessFunction[Color, Item, Rule, String]#Context, out: Collector[String]): Unit = {
ctx.getBroadcastState(ruleStateDescriptor).put(value.name, value);
}
}
).print("sink --> ")
env.execute("flink-broadcast-state")
}
}