python sink to kafka

from pyflink.datastream import StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

import json

import re

import logging

import sys

from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor

from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer

from pyflink.common.typeinfo import Types

from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType

from pyflink.datastream.connectors import DeliveryGuarantee

from pyflink.common.serialization import SimpleStringSchema

from datetime import datetime

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")

logger = logging.getLogger(name)

创建 StreamExecutionEnvironment 对象

env = StreamExecutionEnvironment.get_execution_environment()

env.set_parallelism(1)

env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")

from pyflink.datastream import DataStream, StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

from pyflink.common.typeinfo import Types

env = StreamExecutionEnvironment.get_execution_environment()

data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))

TEST_KAFKA_SERVERS = "192.168.137.201:9092"

TEST_KAFKA_TOPIC = "test_topic_elink"

TEST_GROUP_ID = "pyflink_elink_midsys"

def get_kafka_customer_properties(kafka_servers: str, group_id: str):

properties = {

"bootstrap.servers": kafka_servers,

"fetch.max.bytes": "67108864",

"key.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"enable.auto.commit": "false", # 关闭kafka 自动提交,此处不能传bool 类型会报错

"group.id": group_id,

}

return properties

properties = get_kafka_customer_properties(TEST_KAFKA_SERVERS, TEST_GROUP_ID)

producer_properties = {

'bootstrap.servers': '192.168.137.201:9092'

}

producer = FlinkKafkaProducer(

topic=TEST_KAFKA_TOPIC,

producer_config=properties,

serialization_schema=SimpleStringSchema()

)

data.add_sink(producer)

data.print()

env.execute()

相关推荐
小白学大数据17 分钟前
线上故障急救:依托 OpenClaw 日志排查 403 和 503 问题
爬虫·python·selenium·数据分析
databook1 小时前
用SymPy自动因式分解:从面积拼图到代数恒等式
python·数学·动效
艳阳天_.1 小时前
星瀚弹框页面实现
java·前端·python
kernelcraft1 小时前
Boto3:Python 操作 AWS 的官方 SDK
开发语言·python·其他·aws
D3bugRealm1 小时前
cryptography:Python 开发者的加密标准库
开发语言·python·其他
HappyAcmen2 小时前
5.通义向量模型调用
python
python-码博士2 小时前
PyTorch 从零实现 Flow Matching:训练、采样、画图一条龙
人工智能·pytorch·python
王小王-1233 小时前
基于Python的车联网数据聚合与可视化分析平台设计与实现
python·车联网·新能源汽车·车联网聚合分析
叫我:松哥3 小时前
基于Flask框架的校园二手书籍交易平台,注重校园场景的特殊需求,通过学号认证保障用户真实性
后端·python·sqlite·flask·bootstrap
namexingyun3 小时前
开源前端生态如何成为 AI UI 生成的“燃料“:shadcn/ui、Tailwind CSS、Storybook 技术价值全解剖
java·前端·人工智能·python·ui·开源·ai编程