python sink to kafka

from pyflink.datastream import StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

import json

import re

import logging

import sys

from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor

from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer

from pyflink.common.typeinfo import Types

from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType

from pyflink.datastream.connectors import DeliveryGuarantee

from pyflink.common.serialization import SimpleStringSchema

from datetime import datetime

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")

logger = logging.getLogger(name)

创建 StreamExecutionEnvironment 对象

env = StreamExecutionEnvironment.get_execution_environment()

env.set_parallelism(1)

env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")

from pyflink.datastream import DataStream, StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

from pyflink.common.typeinfo import Types

env = StreamExecutionEnvironment.get_execution_environment()

data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))

TEST_KAFKA_SERVERS = "192.168.137.201:9092"

TEST_KAFKA_TOPIC = "test_topic_elink"

TEST_GROUP_ID = "pyflink_elink_midsys"

def get_kafka_customer_properties(kafka_servers: str, group_id: str):

properties = {

"bootstrap.servers": kafka_servers,

"fetch.max.bytes": "67108864",

"key.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"enable.auto.commit": "false", # 关闭kafka 自动提交,此处不能传bool 类型会报错

"group.id": group_id,

}

return properties

properties = get_kafka_customer_properties(TEST_KAFKA_SERVERS, TEST_GROUP_ID)

producer_properties = {

'bootstrap.servers': '192.168.137.201:9092'

}

producer = FlinkKafkaProducer(

topic=TEST_KAFKA_TOPIC,

producer_config=properties,

serialization_schema=SimpleStringSchema()

)

data.add_sink(producer)

data.print()

env.execute()

相关推荐
iAm_Ike2 分钟前
如何用 IndexedDB 存储从 API 获取的超大列表并实现二级索引
jvm·数据库·python
Land03296 分钟前
指纹浏览器自动化集成方案|多浏览器RPA适配实战记录
运维·人工智能·爬虫·python·selenium·自动化·rpa
X566123 分钟前
CSS Flex布局如何让特定子元素不参与缩放_设置flex-shrink- 0的实战技巧
jvm·数据库·python
databook25 分钟前
告别手动计算,SymPy 初识与 Manim 联动
python·数学·动效
jayson.h31 分钟前
可视化界面
开发语言·python
weixin_4440129332 分钟前
CSS如何快速实现网站换肤功能_利用CSS变量重置全局颜色方案
jvm·数据库·python
kgduu38 分钟前
python中的魔法方法
开发语言·python
m0_5967490941 分钟前
Vue.js计算属性computed依赖追踪与副作用函数effect关联机制
jvm·数据库·python
神明9311 小时前
Golang testing怎么写单元测试_Golang单元测试教程【经典】
jvm·数据库·python
keineahnung23451 小时前
為什麼要有 eval_is_non_overlapping_and_dense?PyTorch 包裝層與調用端解析
人工智能·pytorch·python·深度学习