flink1.18 广播流 The Broadcast State Pattern 官方案例scala版本

对应官网

复制代码
https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/

测试数据

复制代码
 * 广播流 官方案例 scala版本
 * 广播状态
 *    https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/
 * 事件流:
 *    red,4side
 *    red,5side
 *    red,1side
 *    red,4side
 *
 * 规则流:
 *    rule1,4side,1side
 * 广播规则流,使用mapstate存储每种规则和对应的事件流数据 eg: {rule1 -> [4side,4side]} 遇到1side到来,则全部输出.
 * map可存储多个规则

完整scala版本代码

Scala 复制代码
package com.yy.state.operatorStateDemo

import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeHint, TypeInformation}
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.api.java.typeutils.ListTypeInfo
import org.apache.flink.streaming.api.datastream.KeyedStream
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment
import org.apache.flink.util.Collector

import java.time.ZoneId
import scala.collection.JavaConverters.iterableAsScalaIterableConverter
import scala.collection.mutable.ListBuffer
import scala.collection.JavaConverters._

/**
 * 广播流 官方案例 scala版本
 * 广播状态
 *    https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/
 * 事件流:
 *    red,4side
 *    red,5side
 *    red,4side
 *    red,1side
 *    red,4side
 *
 * 规则流:
 *    rule1,4side,1side
 * 广播规则流,使用mapstate存储每种规则和对应的事件流数据 eg: {rule1 -> [4side,4side]} 遇到1side到来,则全部输出.
 * map可存储多个规则
 */
object BroadcastStateV1 {

  case class Item(color:Color,shape: Shape){
    def getShape()={
      shape
    }

  }
  case class Rule(name:String,first:Shape,second:Shape)
  case class Color(color:String)
  case class Shape(shape:String)

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    val tEnv = StreamTableEnvironment.create(env)


    // 指定国内时区
    tEnv.getConfig.setLocalTimeZone(ZoneId.of("Asia/Shanghai"))


    val itemStream = env.socketTextStream("localhost", 9999)
      .map(_.split(","))
      .map(arr => Item(Color(arr(0)), Shape(arr(1))))

    val ruleStream = env.socketTextStream("localhost", 9998).broadcast()
      .map(s => Rule(s.split(",")(0), Shape(s.split(",")(1)), Shape(s.split(",")(2))))


    val ruleStateDescriptor = new MapStateDescriptor(
      "RulesBroadcastState",
      BasicTypeInfo.STRING_TYPE_INFO,
      TypeInformation.of(new TypeHint[Rule](){}));
    val ruleBroadcastStream = ruleStream.broadcast(ruleStateDescriptor)


    val colorPartitionedStream: KeyedStream[Item, Color] = itemStream
      .keyBy(new KeySelector[Item, Color] {
        override def getKey(value: Item): Color = value.color
      })


    colorPartitionedStream
      .connect(ruleBroadcastStream)
      .process(

        // type arguments in our KeyedBroadcastProcessFunction represent:
        //   1. the key of the keyed stream
        //   2. the type of elements in the non-broadcast side
        //   3. the type of elements in the broadcast side
        //   4. the type of the result, here a string

        new KeyedBroadcastProcessFunction[Color, Item, Rule, String]() {
          val  mapStateDesc =
            new MapStateDescriptor(
              "items",
              BasicTypeInfo.STRING_TYPE_INFO,
              new ListTypeInfo(classOf[Item]))

          val ruleStateDescriptor =
            new MapStateDescriptor(
              "RulesBroadcastState",
              BasicTypeInfo.STRING_TYPE_INFO,
              TypeInformation.of(new TypeHint[Rule]() {}))

          override def processElement(value: Item, ctx: KeyedBroadcastProcessFunction[Color, Item, Rule, String]#ReadOnlyContext, out: Collector[String]): Unit = {
            val state = getRuntimeContext().getMapState(mapStateDesc)
            val shape = value.getShape()
            // 遍历广播的 rule
            ctx.getBroadcastState(ruleStateDescriptor).immutableEntries().asScala.foreach{
              entry =>
                val ruleName = entry.getKey()
                val rule = entry.getValue()
                val stored: ListBuffer[Item] = {
                  if (state.contains(ruleName)) {
                    state.get(ruleName).asScala.to[ListBuffer]
                  } else {
                    new ListBuffer[Item]()
                  }
                }

                //
                if (shape == rule.second && stored.nonEmpty) {
                  stored.foreach { i =>
                    out.collect("MATCH: " + i + " - " + value);
                  }
                  stored.clear();
                }

                // there is no else{} to cover if rule.first == rule.second
                if (shape.equals(rule.first)) {
                  stored.append(value);
                }

                if (stored.isEmpty) {
                  // 规则已经匹配输出 清理状态
                  state.remove(ruleName)
                } else {
                  // 没输出则更新状态
                  state.put(ruleName, stored.asJava)
                }


            }
          }

          override def processBroadcastElement(value: Rule, ctx: KeyedBroadcastProcessFunction[Color, Item, Rule, String]#Context, out: Collector[String]): Unit = {
            ctx.getBroadcastState(ruleStateDescriptor).put(value.name, value);
          }
        }
      ).print("sink --> ")




    env.execute("flink-broadcast-state")
  }
}
相关推荐
Java水解1 小时前
Spring Boot 4 升级指南:告别RestTemplate,拥抱现代HTTP客户端
spring boot·后端
宫水三叶的刷题日记1 小时前
工商银行今年的年终奖。。
后端
大黄评测1 小时前
双库协同,各取所长:.NET Core 中 PostgreSQL 与 SQLite 的优雅融合实战
后端
Java编程爱好者1 小时前
Java 后端定时任务怎么选:@Scheduled、Quartz 还是 XXL-Job?
后端
Java编程爱好者1 小时前
线程池用完不Shutdown,CPU和内存都快哭了
后端
像风一样的男人@1 小时前
python --读取psd文件
开发语言·python·深度学习
输出输入2 小时前
前端核心技术
开发语言·前端
加油,小猿猿2 小时前
Java开发日志-双数据库事务问题
java·开发语言·数据库
神奇小汤圆2 小时前
Unsafe魔法类深度解析:Java底层操作的终极指南
后端
薛定谔的猫喵喵2 小时前
天然气压力能利用系统综合性评价平台:基于Python和PyQt5的AHP与模糊综合评价集成应用
开发语言·python·qt