pyflink过滤kafka数据

py 复制代码
from pyflink.table import (TableEnvironment, EnvironmentSettings)

# 输入、输出、过滤条件
columns_in = [
...
]

columns_out = [
...
]
filter_condition = "name = '蒋介石' and sex = '男'"


# 创建执行环境

t_env = TableEnvironment.create(EnvironmentSettings.in_streaming_mode())
t_env.get_config().get_configuration().set_string("pipeline.jars", "file:///work/flink-sql-connector-kafka-3.2.0-1.19.jar")

source_topic = "foo"
sink_topic = "baa"
kafka_servers = "kafka:9092"
kafka_consumer_group_id = "flink consumer"

columnstr = ','.join([f"`{col}` VARCHAR"  for col in columns_in])
source_ddl = f"""
CREATE TABLE kafka_source({columnstr}) WITH (
              'connector' = 'kafka',
              'topic' = '{source_topic}',
              'properties.bootstrap.servers' = '{kafka_servers}',
              'properties.group.id' = '{kafka_consumer_group_id}',
              'scan.startup.mode' = 'latest-offset',
              'format' = 'json'
            )
"""

columnstr2 = ','.join([f"`{col}` VARCHAR"  for col in columns_out])
sink_ddl = f"""
CREATE TABLE kafka_sink ({columnstr2}
    ) with (
      'connector' = 'kafka',
      'topic' = '{sink_topic}',
      'properties.bootstrap.servers' = '{kafka_servers}',
      'properties.group.id' = '{kafka_consumer_group_id}',
      'scan.startup.mode' = 'latest-offset',
      'format' = 'json'
    )
"""
# 过滤字段
filtersql = f"""
insert into kafka_sink
select {
','.join([f"`{col}`"  for col in columns_out])
}
from kafka_source
where {filter_condition}
"""
t_env.execute_sql(filtersql)
t_env.execute_sql(source_ddl)
t_env.execute_sql(sink_ddl)
相关推荐
老歌老听老掉牙7 分钟前
平面旋转与交线投影夹角计算
python·线性代数·平面·sympy
满怀101512 分钟前
Python入门(7):模块
python
无名之逆12 分钟前
Rust 开发提效神器:lombok-macros 宏库
服务器·开发语言·前端·数据库·后端·python·rust
你觉得20516 分钟前
哈尔滨工业大学DeepSeek公开课:探索大模型原理、技术与应用从GPT到DeepSeek|附视频与讲义下载方法
大数据·人工智能·python·gpt·学习·机器学习·aigc
啊喜拔牙25 分钟前
1. hadoop 集群的常用命令
java·大数据·开发语言·python·scala
IT成长日记25 分钟前
【Kafka基础】Kafka工作原理解析
分布式·kafka
__lost2 小时前
Pysides6 Python3.10 Qt 画一个时钟
python·qt
誉鏐2 小时前
PyTorch复现线性模型
人工智能·pytorch·python
州周2 小时前
kafka副本同步时HW和LEO
分布式·kafka