flink1.18 广播流 The Broadcast State Pattern 官方案例scala版本

对应官网

复制代码
https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/

测试数据

复制代码
 * 广播流 官方案例 scala版本
 * 广播状态
 *    https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/
 * 事件流:
 *    red,4side
 *    red,5side
 *    red,1side
 *    red,4side
 *
 * 规则流:
 *    rule1,4side,1side
 * 广播规则流,使用mapstate存储每种规则和对应的事件流数据 eg: {rule1 -> [4side,4side]} 遇到1side到来,则全部输出.
 * map可存储多个规则

完整scala版本代码

Scala 复制代码
package com.yy.state.operatorStateDemo

import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.common.typeinfo.{BasicTypeInfo, TypeHint, TypeInformation}
import org.apache.flink.api.java.functions.KeySelector
import org.apache.flink.api.java.typeutils.ListTypeInfo
import org.apache.flink.streaming.api.datastream.KeyedStream
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment
import org.apache.flink.util.Collector

import java.time.ZoneId
import scala.collection.JavaConverters.iterableAsScalaIterableConverter
import scala.collection.mutable.ListBuffer
import scala.collection.JavaConverters._

/**
 * 广播流 官方案例 scala版本
 * 广播状态
 *    https://nightlies.apache.org/flink/flink-docs-master/docs/dev/datastream/fault-tolerance/broadcast_state/
 * 事件流:
 *    red,4side
 *    red,5side
 *    red,4side
 *    red,1side
 *    red,4side
 *
 * 规则流:
 *    rule1,4side,1side
 * 广播规则流,使用mapstate存储每种规则和对应的事件流数据 eg: {rule1 -> [4side,4side]} 遇到1side到来,则全部输出.
 * map可存储多个规则
 */
object BroadcastStateV1 {

  case class Item(color:Color,shape: Shape){
    def getShape()={
      shape
    }

  }
  case class Rule(name:String,first:Shape,second:Shape)
  case class Color(color:String)
  case class Shape(shape:String)

  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment

    val tEnv = StreamTableEnvironment.create(env)


    // 指定国内时区
    tEnv.getConfig.setLocalTimeZone(ZoneId.of("Asia/Shanghai"))


    val itemStream = env.socketTextStream("localhost", 9999)
      .map(_.split(","))
      .map(arr => Item(Color(arr(0)), Shape(arr(1))))

    val ruleStream = env.socketTextStream("localhost", 9998).broadcast()
      .map(s => Rule(s.split(",")(0), Shape(s.split(",")(1)), Shape(s.split(",")(2))))


    val ruleStateDescriptor = new MapStateDescriptor(
      "RulesBroadcastState",
      BasicTypeInfo.STRING_TYPE_INFO,
      TypeInformation.of(new TypeHint[Rule](){}));
    val ruleBroadcastStream = ruleStream.broadcast(ruleStateDescriptor)


    val colorPartitionedStream: KeyedStream[Item, Color] = itemStream
      .keyBy(new KeySelector[Item, Color] {
        override def getKey(value: Item): Color = value.color
      })


    colorPartitionedStream
      .connect(ruleBroadcastStream)
      .process(

        // type arguments in our KeyedBroadcastProcessFunction represent:
        //   1. the key of the keyed stream
        //   2. the type of elements in the non-broadcast side
        //   3. the type of elements in the broadcast side
        //   4. the type of the result, here a string

        new KeyedBroadcastProcessFunction[Color, Item, Rule, String]() {
          val  mapStateDesc =
            new MapStateDescriptor(
              "items",
              BasicTypeInfo.STRING_TYPE_INFO,
              new ListTypeInfo(classOf[Item]))

          val ruleStateDescriptor =
            new MapStateDescriptor(
              "RulesBroadcastState",
              BasicTypeInfo.STRING_TYPE_INFO,
              TypeInformation.of(new TypeHint[Rule]() {}))

          override def processElement(value: Item, ctx: KeyedBroadcastProcessFunction[Color, Item, Rule, String]#ReadOnlyContext, out: Collector[String]): Unit = {
            val state = getRuntimeContext().getMapState(mapStateDesc)
            val shape = value.getShape()
            // 遍历广播的 rule
            ctx.getBroadcastState(ruleStateDescriptor).immutableEntries().asScala.foreach{
              entry =>
                val ruleName = entry.getKey()
                val rule = entry.getValue()
                val stored: ListBuffer[Item] = {
                  if (state.contains(ruleName)) {
                    state.get(ruleName).asScala.to[ListBuffer]
                  } else {
                    new ListBuffer[Item]()
                  }
                }

                //
                if (shape == rule.second && stored.nonEmpty) {
                  stored.foreach { i =>
                    out.collect("MATCH: " + i + " - " + value);
                  }
                  stored.clear();
                }

                // there is no else{} to cover if rule.first == rule.second
                if (shape.equals(rule.first)) {
                  stored.append(value);
                }

                if (stored.isEmpty) {
                  // 规则已经匹配输出 清理状态
                  state.remove(ruleName)
                } else {
                  // 没输出则更新状态
                  state.put(ruleName, stored.asJava)
                }


            }
          }

          override def processBroadcastElement(value: Rule, ctx: KeyedBroadcastProcessFunction[Color, Item, Rule, String]#Context, out: Collector[String]): Unit = {
            ctx.getBroadcastState(ruleStateDescriptor).put(value.name, value);
          }
        }
      ).print("sink --> ")




    env.execute("flink-broadcast-state")
  }
}
相关推荐
为思念酝酿的痛6 小时前
POSIX信号量
linux·运维·服务器·后端
小羊在睡觉6 小时前
力扣84. 柱状图中最大的矩形
后端·算法·leetcode·golang·go
AI玫瑰助手7 小时前
Python函数:默认参数的定义与注意事项
开发语言·python·信息可视化
油炸自行车7 小时前
Claude Code 错误:API Error: 400 Failed to deserialize the JSON body into the
开发语言·javascript·json·trae·claude code·api error 400
肩上风骋7 小时前
C++14特性
开发语言·c++·c++14特性
swipe7 小时前
Neo4j + Graph RAG 医疗知识图谱工程实践:患者教育问答真正需要的是“关系可追溯”
后端·langchain·llm
源码宝8 小时前
MES系统源码:Java8 + SpringBoot2.7 + MySQL8 + Redis,后端源码清爽易扩展
java·后端·源码·springboot·mes系统·源码二开·mes源码
JAVA社区8 小时前
Java高级全套教程(十)—— SpringCloudAlibaba超详细实战详解
java·开发语言·spring cloud·面试·职场和发展
弥树子8 小时前
踩坑记录:服务器内网调用接口,真实请求URL与官方公开URL不一致问题排查
开发语言·php
金銀銅鐵8 小时前
[Java] 如何理解 class 文件中方法的 descriptor?
java·后端