部署Flink1.20.1

1、设置环境变量

复制代码
export JAVA_HOME=/cluster/jdk
export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jarp
#export HIVE_HOME=/cluster/hive
export MYSQL_HOME=/cluster/mysql
export HADOOP_HOME=/cluster/hadoop3
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_CLASSPATH=`hadoop classpath`
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export FLINK_HOME=/cluster/flink
export SPARK_HOME=/cluster/spark
export ZK_HOME=/cluster/zookeeper
export NACOS_HOME=/cluster/nacos
export KAFKA_HOME=/cluster/kafka
export DATART_HOME=/cluster/datart
export HBASE_HOME=/cluster/hbase
export SEATUNNEL_HOME=/cluster/seatunnel
export STREAMPARK_HOME=/cluster/streampark
export KYUUBI_HOME=/cluster/kyuubi
export DINKY_HOME=/cluster/dinky
export INLONG_HOME=/cluster/inlong
export DORIS_HOME=/cluster/doris
export BE_HOME=$DORIS_HOME/be
export FE_HOME=$DORIS_HOME/fe
export M2_HOME=/cluster/maven
export PATH=$PATH:$M2_HOME/bin:$BE_HOME/bin:$FE_HOME/bin:$DINKY_HOME/bin:$INLONG_HOME/bin:$DATART_HOME/bin:$KYUUBI_HOME/bin:$HBASE_HOME/bin:$SEATUNNEL_HOME/bin:$STREAMPARK_HOME/bin:$FLINK_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/SPARK_HOME:$KAFKA_HOME:$MYSQL_HOME/bin:$HIVE_HOME/bin:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$NACOS_HOME/bin:$ZK_HOME/bin

2、 flink的配置文件config.yaml

复制代码
env:
  java:
    opts:
      all: --add-exports=java.base/sun.net.util=ALL-UNNAMED --add-exports=java.rmi/sun.rmi.registry=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-exports=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED
#==============================================================================
# Common
#==============================================================================
# Common
#==============================================================================
jobmanager:
  bind-host: 0.0.0.0
  rpc:
    address: 0.0.0.0
    port: 6123
  memory:
    process:
      size: 1600m
  execution:
    failover-strategy: region
  archive:
    fs:
      dir: hdfs://10.10.10.99:9000/flink/completed-jobs/
taskmanager:
  bind-host: 0.0.0.0
  host: 0.0.0.0
  numberOfTaskSlots: 100
  memory:
    process:
      size: 1728m
    network:
      fraction: 0.1
      min: 64mb
      max: 1gb
parallelism:
  default: 1
fs:
  default-scheme: hdfs://10.10.10.99:9000
#==============================================================================
# High Availability zookeeper没有开启认证,应该尝试下怎么开启zookeeper的认证方式
#==============================================================================
high-availability:
  # The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
  type: zookeeper
  # The path where metadata for master recovery is persisted. While ZooKeeper stores
  # the small ground truth for checkpoint and leader election, this location stores
  # the larger objects, like persisted dataflow graphs.
  #
  # Must be a durable file system that is accessible from all nodes
  # (like HDFS, S3, Ceph, nfs, ...)
  storageDir: hdfs:///flink/ha/
  zookeeper:
    # The list of ZooKeeper quorum peers that coordinate the high-availability
    # setup. This must be a list of the form:
    # "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
    quorum: localhost:2181
    client:
      # ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
      # It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
      # The default value is "open" and it can be changed to "creator" if ZK security is enabled
      acl: open
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled. Checkpointing is enabled when execution.checkpointing.interval > 0.
# # Execution checkpointing related parameters. Please refer to CheckpointConfig and CheckpointingOptions for more details.
execution:
  checkpointing:
    interval: 3min
    externalized-checkpoint-retention: DELETE_ON_CANCELLATION
    max-concurrent-checkpoints: 1
    min-pause: 0s
    mode: EXACTLY_ONCE
    timeout: 10min
    tolerable-failed-checkpoints: 0
    unaligned: false
state:
  backend:
    type: hashmap
    incremental: false
  checkpoints:
    dir: hdfs://10.10.10.99:9000/flink/flink-checkpoints
  savepoints:
    dir: hdfs://10.10.10.99:9000/flink/flink-savepoints
#==============================================================================
# Rest & web frontend
#==============================================================================
rest:
  address: 0.0.0.0
  bind-address: 0.0.0.0
  web:
    submit:
      enable: true
    cancel:
      enable: true
#==============================================================================
# Advanced
#==============================================================================  
io:
  tmp:
    dirs: /tmp
classloader:
  resolve:
    order: child-first
#==============================================================================
# Flink Cluster Security Configuration
#==============================================================================
# Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors -
# may be enabled in four steps:
# 1. configure the local krb5.conf file
# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
# 3. make the credentials available to various JAAS login contexts
# 4. configure the connector to use JAAS/SASL
# # The below configure how Kerberos credentials are provided. A keytab will be used instead of
# # a ticket cache if the keytab path and principal are set.
# security:
#   kerberos:
#     login:
#       use-ticket-cache: true
#       keytab: /path/to/kerberos/keytab
#       principal: flink-user
#       # The configuration below defines which JAAS login contexts
#       contexts: Client,KafkaClient
#==============================================================================
# ZK Security Configuration
#==============================================================================
# zookeeper:
#   sasl:
#     # Below configurations are applicable if ZK ensemble is configured for security
#     #
#     # Override below configuration to provide custom ZK service name if configured
#     # zookeeper.sasl.service-name: zookeeper
#     #
#     # The configuration below must match one of the values set in "security.kerberos.login.contexts"
#     login-context-name: Client
#==============================================================================
# HistoryServer
#==============================================================================
historyserver:
  web:
    address: 0.0.0.0
    port: 8082
  archive:
    fs:
      dir: hdfs://10.10.10.99:9000/flink/historyserver/completed-jobs/
      fs.refresh-interval: 10000

3、提交运行

一、Flink 作业提交模式及对应命令

(一)Per - Job 模式

/cluster/flink/bin/flink run \

-t yarn-per-job \

-d \

-ynm YarnPerJobTopSpeedWindowing \

-Dyarn.application.name=YarnPerJobTopSpeedWindowing \

-Dclassloader.check-leaked-classloader=false \

-Dyarn.taskmanager.cpu.cores=2 \

-Dyarn.containers.vcores=2 \

-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \

/cluster/flink/examples/streaming/TopSpeedWindowing.jar \

--windowSize 3000 \

--eventsPerSecond 100

/cluster/flink/bin/flink run \

-t yarn-per-job \

-d \

-ynm YarnPerJobTopSpeedWindowing \

-yqu default \

-ys 2 \

-yjm 1024m \

-ytm 2048m \

-Dclassloader.check-leaked-classloader=false \

-Dyarn.taskmanager.cpu.cores=2 \

-Dyarn.containers.vcores=2 \

-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \

/cluster/flink/examples/streaming/TopSpeedWindowing.jar \

--windowSize 3000 \

--eventsPerSecond 100

sudo kill -9 (ps -ef\|grep hadoop\|gawk '0 !~/grep/ {print $2}' |tr -s '\n' ' ')

rm -rf /cluster/hadoop3/logs/*

/cluster/hadoop3/sbin/start-all.sh

====================================================================================================================

(二)Session 模式

该模式需要先启动 Yarn 会话,获取会话 ID 后再提交作业。

  1. 启动 Yarn 会话、启动会话后会打印输出会话 ID,示例:application_1740829241495_0001

/cluster/flink/bin/yarn-session.sh \

-jm 2048 \

-tm 2048 \

-s 1 \

-nm yarn-session-app \

-d

停止 yarn application -kill application_1740829241495_0001

yarn application -list

  1. 在 Yarn 会话中提交作业

/cluster/flink/bin/flink run \

-Dyarn.application.name=YarnSessionAppTopSpeedWindowing \

-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \

-yid application_1740829241495_0001 \

/cluster/flink/examples/streaming/TopSpeedWindowing.jar

这个会唤起一个新的flink进程,页面的端口不一定是8081

sudo kill -9 (ps -ef\|grep hadoop\|gawk '0 !~/grep/ {print $2}' |tr -s '\n' ' ')

rm -rf /cluster/hadoop3/logs/*

/cluster/hadoop3/sbin/start-all.sh

====================================================================================================================

三)Application 模式

/cluster/flink/bin/flink run-application \

-t yarn-application \

-Dparallelism.default=1 \

-Djobmanager.memory.process.size=2048m \

-Dtaskmanager.memory.process.size=2048m \

-Dyarn.application.name=RunApplicationTopSpeedWindowing \

-Dtaskmanager.numberOfTaskSlots=1 \

-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \

/cluster/flink/examples/streaming/TopSpeedWindowing.jar --windowSize 3000

问题1:Caused by: org.apache.flink.configuration.IllegalConfigurationException:

The number of requested virtual cores for application master 1 exceeds

the maximum number of virtual cores 0 available in the Yarn Cluster

yarn启动失败,spark-3.5.4-yarn-shuffle.jar文件,重启hadoop集群。

页面访问http://ip:18088/cluster

​Web frontend listening at http://ip:42745.

相关推荐
Leo.yuan3 分钟前
3D 数据可视化系统是什么?具体应用在哪方面?
大数据·数据库·3d·信息可视化·数据分析
只因只因爆38 分钟前
spark小任务
大数据·分布式·spark
cainiao08060544 分钟前
Java 大视界——Java 大数据在智慧交通智能停车诱导系统中的数据融合与实时更新
java·大数据·开发语言
End9284 小时前
Spark之搭建Yarn模式
大数据·分布式·spark
我爱写代码?4 小时前
Spark 集群配置、启动与监控指南
大数据·开发语言·jvm·spark·mapreduce
TDengine (老段)4 小时前
什么是物联网 IoT 平台?
大数据·数据库·物联网·时序数据库·tdengine·涛思数据
青云交4 小时前
Java 大视界 -- 基于 Java 的大数据分布式存储在工业互联网海量设备数据长期存储中的应用优化(248)
java·大数据·工业互联网·分布式存储·冷热数据管理·hbase 优化·kudu 应用
艾醒(AiXing-w)5 小时前
探索大语言模型(LLM):国产大模型DeepSeek vs Qwen,谁才是AI模型的未来?
大数据·人工智能·语言模型
£菜鸟也有梦5 小时前
从0到1上手Kafka:开启分布式消息处理之旅
大数据·kafka·消息队列
Elastic 中国社区官方博客5 小时前
在 Elasticsearch 中删除文档中的某个字段
大数据·数据库·elasticsearch·搜索引擎