python sink to kafka

from pyflink.datastream import StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

import json

import re

import logging

import sys

from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor

from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer

from pyflink.common.typeinfo import Types

from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType

from pyflink.datastream.connectors import DeliveryGuarantee

from pyflink.common.serialization import SimpleStringSchema

from datetime import datetime

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")

logger = logging.getLogger(name)

创建 StreamExecutionEnvironment 对象

env = StreamExecutionEnvironment.get_execution_environment()

env.set_parallelism(1)

env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")

from pyflink.datastream import DataStream, StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

from pyflink.common.typeinfo import Types

env = StreamExecutionEnvironment.get_execution_environment()

data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))

TEST_KAFKA_SERVERS = "192.168.137.201:9092"

TEST_KAFKA_TOPIC = "test_topic_elink"

TEST_GROUP_ID = "pyflink_elink_midsys"

def get_kafka_customer_properties(kafka_servers: str, group_id: str):

properties = {

"bootstrap.servers": kafka_servers,

"fetch.max.bytes": "67108864",

"key.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"enable.auto.commit": "false", # 关闭kafka 自动提交,此处不能传bool 类型会报错

"group.id": group_id,

}

return properties

properties = get_kafka_customer_properties(TEST_KAFKA_SERVERS, TEST_GROUP_ID)

producer_properties = {

'bootstrap.servers': '192.168.137.201:9092'

}

producer = FlinkKafkaProducer(

topic=TEST_KAFKA_TOPIC,

producer_config=properties,

serialization_schema=SimpleStringSchema()

)

data.add_sink(producer)

data.print()

env.execute()

相关推荐
Polar__Star10 小时前
如何结合计划任务实现自动定时备份任务配置_全自动化运维管理
jvm·数据库·python
weixin_5806140015 小时前
如何提取SQL日期中的年份_使用YEAR或EXTRACT函数
jvm·数据库·python
2301_8135995515 小时前
SQL生产环境规范_数据库使用最佳实践
jvm·数据库·python
李可以量化15 小时前
QMT 量化实战:用 Python 实现线性回归通道,精准识别趋势中的支撑与压力(下)
python·qmt·量化 qmt ptrade
a95114164216 小时前
Go 中通过 channel 传递切片时的数据竞争与深拷贝解决方案
jvm·数据库·python
Dxy123931021616 小时前
Python 使用正则表达式将多个空格替换为一个空格
开发语言·python·正则表达式
qq_1898070316 小时前
如何修改RAC数据库名_NID工具在集群环境下的改名步骤
jvm·数据库·python
zhangchaoxies16 小时前
如何检测SQL注入风险_利用模糊测试技术发现漏洞
jvm·数据库·python
Luca_kill16 小时前
MCP数据采集革命:从传统爬虫到智能代理的技术进化
爬虫·python·ai·数据采集·mcp·webscraping·集蜂云
zhangchaoxies17 小时前
CSS如何实现响应式弹性网格布局_配合media query修改flex-wrap属性
jvm·数据库·python