from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from pyflink.datastream.connectors import DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)
# ���� StreamExecutionEnvironment ����
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")
from pyflink.datastream import DataStream, StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
from pyflink.common.typeinfo import Types
env = StreamExecutionEnvironment.get_execution_environment()
data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))
#调用map算子,封装成一个task,并行度为8,有8个subtask
ds1=data.map(lambda s: s.upper()).set_parallelism(8)
##sink算子,并行度为4
ds1.print().set_parallelism(4)
pyflink task并行度问题
scan7242024-05-09 20:45
相关推荐
Humbunklung20 分钟前
PySide6 GUI 学习笔记——常用类及控件使用方法(多行文本控件QTextEdit)火车叼位1 小时前
使用 uv 工具在 Windows 系统快速下载安装与切换 Python心扬1 小时前
python网络编程忧陌6061 小时前
DAY 44 预训练模型点云SLAM2 小时前
PyTorch 中contiguous函数使用详解和代码演示尘浮7282 小时前
60天python训练计划----day45哆啦A梦的口袋呀2 小时前
基于Python学习《Head First设计模式》第六章 命令模式努力搬砖的咸鱼2 小时前
从零开始搭建 Pytest 测试框架(Python 3.8 + PyCharm 版)Calvex2 小时前
PyCharm集成Conda环境一千柯橘2 小时前
python 项目搭建(类比 node 来学习)