from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from pyflink.datastream.connectors import DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)
# ���� StreamExecutionEnvironment ����
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")
from pyflink.datastream import DataStream, StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
from pyflink.common.typeinfo import Types
env = StreamExecutionEnvironment.get_execution_environment()
data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))
#调用map算子,封装成一个task,并行度为8,有8个subtask
ds1=data.map(lambda s: s.upper()).set_parallelism(8)
##sink算子,并行度为4
ds1.print().set_parallelism(4)
pyflink task并行度问题
scan7242024-05-09 20:45
相关推荐
XiaoMu_0011 小时前
基于Django+Vue3+YOLO的智能气象检测系统honder试试2 小时前
焊接自动化测试平台图像处理分析-模型训练推理心本无晴.2 小时前
Python进程,线程java1234_小锋5 小时前
Scikit-learn Python机器学习 - 特征降维 压缩数据 - 特征提取 - 主成分分析 (PCA)java1234_小锋5 小时前
Scikit-learn Python机器学习 - 特征降维 压缩数据 - 特征提取 - 线性判别分析 (LDA)思辨共悟6 小时前
Python的价值:突出在数据分析与挖掘计算机毕业设计木哥6 小时前
计算机毕设选题:基于Python+Django的B站数据分析系统的设计与实现【源码+文档+调试】中等生7 小时前
Pandas 与 NumPy:数据分析中的黄金搭档用户8356290780517 小时前
Python查找替换PDF文字:告别手动,拥抱自动化星哥说事7 小时前
Python自学12 — 函数和模块