from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from pyflink.datastream.connectors import DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)
# ���� StreamExecutionEnvironment ����
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")
from pyflink.datastream import DataStream, StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
from pyflink.common.typeinfo import Types
env = StreamExecutionEnvironment.get_execution_environment()
data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))
#调用map算子,封装成一个task,并行度为8,有8个subtask
ds1=data.map(lambda s: s.upper()).set_parallelism(8)
##sink算子,并行度为4
ds1.print().set_parallelism(4)
pyflink task并行度问题
scan7242024-05-09 20:45
相关推荐
UR的出不克4 小时前
使用 Python 爬取 Bilibili 弹幕数据并导出 ExcelArms2064 小时前
python时区库学习与光同尘 大道至简4 小时前
ESP32 小智 AI 机器人入门教程从原理到实现(自己云端部署)清水白石0084 小时前
深入 Python 对象模型:PyObject 与 PyVarObject 全解析tjjucheng4 小时前
小程序定制开发服务商推荐囊中之锥.4 小时前
《从零到实战:基于 PyTorch 的手写数字识别完整流程解析》子云之风4 小时前
LSPosed 项目编译问题解决方案小北方城市网4 小时前
SpringBoot 全局异常处理与接口规范实战:打造健壮可维护接口SunnyRivers5 小时前
打包 Python 项目万行5 小时前
机器人系统SLAM讲解