from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from pyflink.datastream.connectors import DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)
# ���� StreamExecutionEnvironment ����
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")
from pyflink.datastream import DataStream, StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
from pyflink.common.typeinfo import Types
env = StreamExecutionEnvironment.get_execution_environment()
data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))
#调用map算子,封装成一个task,并行度为8,有8个subtask
ds1=data.map(lambda s: s.upper()).set_parallelism(8)
##sink算子,并行度为4
ds1.print().set_parallelism(4)
pyflink task并行度问题
scan7242024-05-09 20:45
相关推荐
Ivanqhz5 分钟前
寄存器分配的核心函数 allocate2501_9454235413 分钟前
如何为开源Python项目做贡献?全栈凯哥13 分钟前
23.Python 魔术方法完全指南2401_8845632415 分钟前
使用Flask快速搭建轻量级Web应用AC赳赳老秦18 分钟前
OpenClaw办公文档处理技能:批量转换PDF/Excel,提取数据高效办公苦瓜小生24 分钟前
AI-TestHub:我如何从零开发一个智能测试用例生成平台阿钱真强道27 分钟前
31 Python 聚类:层次聚类怎么理解?AGNES 和 DIANA 有什么区别?小王不爱笑13227 分钟前
Java 泛型详解桃气媛媛27 分钟前
python流程控制-匹配语句match羊小猪~~29 分钟前
算法/力扣--数组典型题目