py
from pyflink.table import (TableEnvironment, EnvironmentSettings)
# 输入、输出、过滤条件
columns_in = [
...
]
columns_out = [
...
]
filter_condition = "name = '蒋介石' and sex = '男'"
# 创建执行环境
t_env = TableEnvironment.create(EnvironmentSettings.in_streaming_mode())
t_env.get_config().get_configuration().set_string("pipeline.jars", "file:///work/flink-sql-connector-kafka-3.2.0-1.19.jar")
source_topic = "foo"
sink_topic = "baa"
kafka_servers = "kafka:9092"
kafka_consumer_group_id = "flink consumer"
columnstr = ','.join([f"`{col}` VARCHAR" for col in columns_in])
source_ddl = f"""
CREATE TABLE kafka_source({columnstr}) WITH (
'connector' = 'kafka',
'topic' = '{source_topic}',
'properties.bootstrap.servers' = '{kafka_servers}',
'properties.group.id' = '{kafka_consumer_group_id}',
'scan.startup.mode' = 'latest-offset',
'format' = 'json'
)
"""
columnstr2 = ','.join([f"`{col}` VARCHAR" for col in columns_out])
sink_ddl = f"""
CREATE TABLE kafka_sink ({columnstr2}
) with (
'connector' = 'kafka',
'topic' = '{sink_topic}',
'properties.bootstrap.servers' = '{kafka_servers}',
'properties.group.id' = '{kafka_consumer_group_id}',
'scan.startup.mode' = 'latest-offset',
'format' = 'json'
)
"""
# 过滤字段
filtersql = f"""
insert into kafka_sink
select {
','.join([f"`{col}`" for col in columns_out])
}
from kafka_source
where {filter_condition}
"""
t_env.execute_sql(filtersql)
t_env.execute_sql(source_ddl)
t_env.execute_sql(sink_ddl)