python sink to kafka

from pyflink.datastream import StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

import json

import re

import logging

import sys

from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor

from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer

from pyflink.common.typeinfo import Types

from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType

from pyflink.datastream.connectors import DeliveryGuarantee

from pyflink.common.serialization import SimpleStringSchema

from datetime import datetime

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")

logger = logging.getLogger(name)

创建 StreamExecutionEnvironment 对象

env = StreamExecutionEnvironment.get_execution_environment()

env.set_parallelism(1)

env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")

from pyflink.datastream import DataStream, StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

from pyflink.common.typeinfo import Types

env = StreamExecutionEnvironment.get_execution_environment()

data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))

TEST_KAFKA_SERVERS = "192.168.137.201:9092"

TEST_KAFKA_TOPIC = "test_topic_elink"

TEST_GROUP_ID = "pyflink_elink_midsys"

def get_kafka_customer_properties(kafka_servers: str, group_id: str):

properties = {

"bootstrap.servers": kafka_servers,

"fetch.max.bytes": "67108864",

"key.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"enable.auto.commit": "false", # 关闭kafka 自动提交,此处不能传bool 类型会报错

"group.id": group_id,

}

return properties

properties = get_kafka_customer_properties(TEST_KAFKA_SERVERS, TEST_GROUP_ID)

producer_properties = {

'bootstrap.servers': '192.168.137.201:9092'

}

producer = FlinkKafkaProducer(

topic=TEST_KAFKA_TOPIC,

producer_config=properties,

serialization_schema=SimpleStringSchema()

)

data.add_sink(producer)

data.print()

env.execute()

相关推荐
金銀銅鐵5 小时前
[Python] 扩展欧几里得算法
python·数学·算法
Duckdblab5 小时前
DuckDB 性能调优终极指南:打造闪电般的分析体验
python
带派擂总6 小时前
Python全栈开发精华版最全合集(包含各种面试题) Day24_异常和错误
python
金銀銅鐵9 小时前
n^5 和 n 的个位数是否总相等?
python·数学
aqi0012 小时前
15天学会AI应用开发(九)利用Chroma持久化向量数据
人工智能·python·大模型·ai编程·ai应用
金銀銅鐵12 小时前
借助 Pygame 探索最大公约数的规律
python·数学·游戏
ServBay1 天前
9 个 Python 第三方库推荐,不用 AI 都好像多出一个团队
后端·python
用户8356290780511 天前
如何使用 Python 添加和管理 Excel 批注(完整示例)
后端·python
用户8356290780511 天前
使用 Python 管理 Excel 工作表:创建、复制、删除与重命名
后端·python