from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from pyflink.datastream.connectors import DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)
# ���� StreamExecutionEnvironment ����
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")
from pyflink.datastream import DataStream, StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
from pyflink.common.typeinfo import Types
env = StreamExecutionEnvironment.get_execution_environment()
data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))
#调用map算子,封装成一个task,并行度为8,有8个subtask
ds1=data.map(lambda s: s.upper()).set_parallelism(8)
##sink算子,并行度为4
ds1.print().set_parallelism(4)
pyflink task并行度问题
scan7242024-05-09 20:45
相关推荐
A尘埃4 小时前
大模型应用python+Java后端+Vue前端的整合A尘埃4 小时前
LLM大模型评估攻略一晌小贪欢5 小时前
【Python办公】处理 CSV和Excel 文件操作指南檀越剑指大厂6 小时前
【Python系列】fastapi和flask中的阻塞问题YoungHong19927 小时前
【Python进阶】告别繁琐Debug!Loguru一键输出异常日志与变量值AiXed7 小时前
PC微信协议之nid算法小李哥哥8 小时前
基于数据的人工智能建模流程及源码示例APIshop8 小时前
实战解析:苏宁易购 item_search 按关键字搜索商品API接口蓝桉~MLGT9 小时前
Python学习历程——Python面向对象编程详解larance9 小时前
Python 中的 *args 和 **kwargs