from pyflink.datastream import StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
import json
import re
import logging
import sys
from pyflink.datastream.state import ValueStateDescriptor, MapStateDescriptor
from pyflink.datastream.connectors.kafka import FlinkKafkaConsumer, TypeInformation,FlinkKafkaProducer
from pyflink.common.typeinfo import Types
from pyflink.datastream.connectors.elasticsearch import Elasticsearch7SinkBuilder, ElasticsearchEmitter, FlushBackoffType
from pyflink.datastream.connectors import DeliveryGuarantee
from pyflink.common.serialization import SimpleStringSchema
from datetime import datetime
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s-%(levelname)s-%(message)s")
logger = logging.getLogger(__name__)
# ���� StreamExecutionEnvironment ����
env = StreamExecutionEnvironment.get_execution_environment()
env.set_parallelism(1)
env.add_jars("file:///root/flink-sql-connector-kafka_2.11-1.14.4.jar")
from pyflink.datastream import DataStream, StreamExecutionEnvironment
from pyflink.datastream.functions import RuntimeContext, FlatMapFunction, MapFunction
from pyflink.common.typeinfo import Types
env = StreamExecutionEnvironment.get_execution_environment()
data = DataStream(env._j_stream_execution_environment.socketTextStream('192.168.137.201', 8899))
#调用map算子,封装成一个task,并行度为8,有8个subtask
ds1=data.map(lambda s: s.upper()).set_parallelism(8)
##sink算子,并行度为4
ds1.print().set_parallelism(4)
pyflink task并行度问题
scan7242024-05-09 20:45
相关推荐
孟健9 小时前
Karpathy 用 200 行纯 Python 从零实现 GPT:代码逐行解析码路飞11 小时前
写了个 AI 聊天页面,被 5 种流式格式折腾了一整天 😭曲幽13 小时前
FastAPI压力测试实战:Locust模拟真实用户并发及优化建议敏编程18 小时前
一天一个Python库:jsonschema - JSON 数据验证利器前端付豪18 小时前
LangChain记忆:通过Memory记住上次的对话细节databook18 小时前
ManimCE v0.20.1 发布:LaTeX 渲染修复与动画稳定性提升花酒锄作田1 天前
使用 pkgutil 实现动态插件系统前端付豪1 天前
LangChain链 写一篇完美推文?用SequencialChain链接不同的组件曲幽1 天前
FastAPI实战:打造本地文生图接口,ollama+diffusers让AI绘画更听话老赵全栈实战1 天前
Pydantic配置管理最佳实践(一)