from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from pyflink.datastream.connectors import DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)
# ���� StreamExecutionEnvironment ����
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")
from pyflink.datastream import DataStream, StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
from pyflink.common.typeinfo import Types
env = StreamExecutionEnvironment.get_execution_environment()
data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))
#调用map算子,封装成一个task,并行度为8,有8个subtask
ds1=data.map(lambda s: s.upper()).set_parallelism(8)
##sink算子,并行度为4
ds1.print().set_parallelism(4)
pyflink task并行度问题
scan7242024-05-09 20:45
相关推荐
cnxy1881 小时前
围棋对弈Python程序开发完整指南:步骤1 - 棋盘基础框架搭建落叶,听雪2 小时前
河南建站系统哪个好极客小云3 小时前
【生物医学NLP信息抽取:药物识别、基因识别与化学物质实体识别教程与应用】南_山无梅落3 小时前
12.Python3函数基础:定义、调用与参数传递规则laocooon5238578864 小时前
插入法排序 python清水白石0086 小时前
《深入 Python 上下文管理器:contextlib.contextmanager 与类实现方式的底层差异全景解析》程序员佳佳6 小时前
GPT-4时代终结?GPT-5.2与Banana Pro实测数据公开,普通开发者如何接住这泼天富贵Blossom.1186 小时前
多模态大模型LoRA微调实战:从零构建企业级图文检索系统小钻风33667 小时前
软件测试: 从入门到实践 (接口测试)小鸡吃米…7 小时前
带Python的人工智能——计算机视觉