python sink to kafka

from pyflink.datastream import StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

import json

import re

import logging

import sys

from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor

from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer

from pyflink.common.typeinfo import Types

from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType

from pyflink.datastream.connectors import DeliveryGuarantee

from pyflink.common.serialization import SimpleStringSchema

from datetime import datetime

logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")

logger = logging.getLogger(name)

创建 StreamExecutionEnvironment 对象

env = StreamExecutionEnvironment.get_execution_environment()

env.set_parallelism(1)

env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")

from pyflink.datastream import DataStream, StreamExecutionEnvironment

from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction

from pyflink.common.typeinfo import Types

env = StreamExecutionEnvironment.get_execution_environment()

data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))

TEST_KAFKA_SERVERS = "192.168.137.201:9092"

TEST_KAFKA_TOPIC = "test_topic_elink"

TEST_GROUP_ID = "pyflink_elink_midsys"

def get_kafka_customer_properties(kafka_servers: str, group_id: str):

properties = {

"bootstrap.servers": kafka_servers,

"fetch.max.bytes": "67108864",

"key.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"value.deserializer": "org.apache.kafka.common.serialization.StringDeserializer",

"enable.auto.commit": "false", # 关闭kafka 自动提交,此处不能传bool 类型会报错

"group.id": group_id,

}

return properties

properties = get_kafka_customer_properties(TEST_KAFKA_SERVERS, TEST_GROUP_ID)

producer_properties = {

'bootstrap.servers': '192.168.137.201:9092'

}

producer = FlinkKafkaProducer(

topic=TEST_KAFKA_TOPIC,

producer_config=properties,

serialization_schema=SimpleStringSchema()

)

data.add_sink(producer)

data.print()

env.execute()

相关推荐
cup1110 小时前
[技术复盘] Windows Python 打包实战:Nuitka 环境踩坑总结与 CI 自动化构建全指南
python·ai·环境变量·ci·nuitka·skill
aqi0012 小时前
15天学会AI应用开发(七)有了大模型为什么还要引入RAG
人工智能·python·大模型·ai编程·ai应用
金銀銅鐵14 小时前
用 Python 实现 Take-Away 游戏
python·游戏
copyer_xyf15 小时前
Agent 流程编排
后端·python·agent
copyer_xyf15 小时前
Agent RAG
后端·python·agent
copyer_xyf15 小时前
【RAG】向量数据库:milvus
后端·python·agent
copyer_xyf15 小时前
Agent 记忆管理
后端·python·agent
星云穿梭1 天前
用Python写一个带图形界面的学生管理系统——完整教程
python
金銀銅鐵1 天前
用 Pygame 实现 15 puzzle
python·数学·游戏