python sink to kafka

from pyflink.datastream import StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

import json

import re

import logging

import sys

from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor

from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer

from pyflink.common.typeinfo import Types

from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType

from pyflink.datastream.connectors import DeliveryGuarantee

from pyflink.common.serialization import SimpleStringSchema

from datetime import datetime

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")

logger = logging.getLogger(name)

创建 StreamExecutionEnvironment 对象

env = StreamExecutionEnvironment.get_execution_environment()

env.set_parallelism(1)

env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")

from pyflink.datastream import DataStream, StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

from pyflink.common.typeinfo import Types

env = StreamExecutionEnvironment.get_execution_environment()

data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))

TEST_KAFKA_SERVERS = "192.168.137.201:9092"

TEST_KAFKA_TOPIC = "test_topic_elink"

TEST_GROUP_ID = "pyflink_elink_midsys"

def get_kafka_customer_properties(kafka_servers: str, group_id: str):

properties = {

"bootstrap.servers": kafka_servers,

"fetch.max.bytes": "67108864",

"key.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"enable.auto.commit": "false", # 关闭kafka 自动提交,此处不能传bool 类型会报错

"group.id": group_id,

}

return properties

properties = get_kafka_customer_properties(TEST_KAFKA_SERVERS, TEST_GROUP_ID)

producer_properties = {

'bootstrap.servers': '192.168.137.201:9092'

}

producer = FlinkKafkaProducer(

topic=TEST_KAFKA_TOPIC,

producer_config=properties,

serialization_schema=SimpleStringSchema()

)

data.add_sink(producer)

data.print()

env.execute()

相关推荐
Byron__40 分钟前
AI学习_06_短期记忆与长期记忆
人工智能·python·学习
取经蜗牛2 小时前
Python 第一阶段完全指南:从零到第一个实用工具
开发语言·python
创世宇图2 小时前
【Python工程化实战】OpenTelemetry 在 Python 中的全链路追踪落地:从埋点到可视化的完整实战指南
python·分布式链路追踪·性能监控·opentelemetry·微服务可观测性
许彰午3 小时前
72_Python爬虫基础BeautifulSoup
爬虫·python·beautifulsoup
zhanghongyi_cpp3 小时前
10. 实验书3.4.2 筛选达到预警阈值的病虫害数据
python
tuddy7894644 小时前
Codex++ 安全边界探秘:从模型能力到风险防御
人工智能·python·安全
C++、Java和Python的菜鸟4 小时前
第1章 集合高级
java·jvm·python
第六五4 小时前
Python 内置装饰器
开发语言·python
梦帮科技4 小时前
UE5 GAS 实战:用 Gameplay Ability System 搭建「赛博修真」境界与技能体系
c++·人工智能·python·ue5·c#
码来的小朋友5 小时前
手把手教你用 Python + PyQt5 做一个可视化图片切图工具
开发语言·python·microsoft