部署Flink1.20.1 - 技术栈

1、设置环境变量

复制代码

export JAVA_HOME=/cluster/jdk
export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jarp
#export HIVE_HOME=/cluster/hive
export MYSQL_HOME=/cluster/mysql
export HADOOP_HOME=/cluster/hadoop3
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_CLASSPATH=`hadoop classpath`
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export FLINK_HOME=/cluster/flink
export SPARK_HOME=/cluster/spark
export ZK_HOME=/cluster/zookeeper
export NACOS_HOME=/cluster/nacos
export KAFKA_HOME=/cluster/kafka
export DATART_HOME=/cluster/datart
export HBASE_HOME=/cluster/hbase
export SEATUNNEL_HOME=/cluster/seatunnel
export STREAMPARK_HOME=/cluster/streampark
export KYUUBI_HOME=/cluster/kyuubi
export DINKY_HOME=/cluster/dinky
export INLONG_HOME=/cluster/inlong
export DORIS_HOME=/cluster/doris
export BE_HOME=$DORIS_HOME/be
export FE_HOME=$DORIS_HOME/fe
export M2_HOME=/cluster/maven
export PATH=$PATH:$M2_HOME/bin:$BE_HOME/bin:$FE_HOME/bin:$DINKY_HOME/bin:$INLONG_HOME/bin:$DATART_HOME/bin:$KYUUBI_HOME/bin:$HBASE_HOME/bin:$SEATUNNEL_HOME/bin:$STREAMPARK_HOME/bin:$FLINK_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/SPARK_HOME:$KAFKA_HOME:$MYSQL_HOME/bin:$HIVE_HOME/bin:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$NACOS_HOME/bin:$ZK_HOME/bin

2、 flink的配置文件config.yaml

复制代码

env:
  java:
    opts:
      all: --add-exports=java.base/sun.net.util=ALL-UNNAMED --add-exports=java.rmi/sun.rmi.registry=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-exports=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED
#==============================================================================
# Common
#==============================================================================
# Common
#==============================================================================
jobmanager:
  bind-host: 0.0.0.0
  rpc:
    address: 0.0.0.0
    port: 6123
  memory:
    process:
      size: 1600m
  execution:
    failover-strategy: region
  archive:
    fs:
      dir: hdfs://10.10.10.99:9000/flink/completed-jobs/
taskmanager:
  bind-host: 0.0.0.0
  host: 0.0.0.0
  numberOfTaskSlots: 100
  memory:
    process:
      size: 1728m
    network:
      fraction: 0.1
      min: 64mb
      max: 1gb
parallelism:
  default: 1
fs:
  default-scheme: hdfs://10.10.10.99:9000
#==============================================================================
# High Availability zookeeper没有开启认证，应该尝试下怎么开启zookeeper的认证方式
#==============================================================================
high-availability:
  # The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
  type: zookeeper
  # The path where metadata for master recovery is persisted. While ZooKeeper stores
  # the small ground truth for checkpoint and leader election, this location stores
  # the larger objects, like persisted dataflow graphs.
  #
  # Must be a durable file system that is accessible from all nodes
  # (like HDFS, S3, Ceph, nfs, ...)
  storageDir: hdfs:///flink/ha/
  zookeeper:
    # The list of ZooKeeper quorum peers that coordinate the high-availability
    # setup. This must be a list of the form:
    # "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
    quorum: localhost:2181
    client:
      # ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
      # It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
      # The default value is "open" and it can be changed to "creator" if ZK security is enabled
      acl: open
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled. Checkpointing is enabled when execution.checkpointing.interval > 0.
# # Execution checkpointing related parameters. Please refer to CheckpointConfig and CheckpointingOptions for more details.
execution:
  checkpointing:
    interval: 3min
    externalized-checkpoint-retention: DELETE_ON_CANCELLATION
    max-concurrent-checkpoints: 1
    min-pause: 0s
    mode: EXACTLY_ONCE
    timeout: 10min
    tolerable-failed-checkpoints: 0
    unaligned: false
state:
  backend:
    type: hashmap
    incremental: false
  checkpoints:
    dir: hdfs://10.10.10.99:9000/flink/flink-checkpoints
  savepoints:
    dir: hdfs://10.10.10.99:9000/flink/flink-savepoints
#==============================================================================
# Rest & web frontend
#==============================================================================
rest:
  address: 0.0.0.0
  bind-address: 0.0.0.0
  web:
    submit:
      enable: true
    cancel:
      enable: true
#==============================================================================
# Advanced
#==============================================================================  
io:
  tmp:
    dirs: /tmp
classloader:
  resolve:
    order: child-first
#==============================================================================
# Flink Cluster Security Configuration
#==============================================================================
# Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors -
# may be enabled in four steps:
# 1. configure the local krb5.conf file
# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
# 3. make the credentials available to various JAAS login contexts
# 4. configure the connector to use JAAS/SASL
# # The below configure how Kerberos credentials are provided. A keytab will be used instead of
# # a ticket cache if the keytab path and principal are set.
# security:
#   kerberos:
#     login:
#       use-ticket-cache: true
#       keytab: /path/to/kerberos/keytab
#       principal: flink-user
#       # The configuration below defines which JAAS login contexts
#       contexts: Client,KafkaClient
#==============================================================================
# ZK Security Configuration
#==============================================================================
# zookeeper:
#   sasl:
#     # Below configurations are applicable if ZK ensemble is configured for security
#     #
#     # Override below configuration to provide custom ZK service name if configured
#     # zookeeper.sasl.service-name: zookeeper
#     #
#     # The configuration below must match one of the values set in "security.kerberos.login.contexts"
#     login-context-name: Client
#==============================================================================
# HistoryServer
#==============================================================================
historyserver:
  web:
    address: 0.0.0.0
    port: 8082
  archive:
    fs:
      dir: hdfs://10.10.10.99:9000/flink/historyserver/completed-jobs/
      fs.refresh-interval: 10000

3、提交运行

一、Flink 作业提交模式及对应命令

（一）Per - Job 模式

/cluster/flink/bin/flink run \

-t yarn-per-job \

-d \

-ynm YarnPerJobTopSpeedWindowing \

-Dyarn.application.name=YarnPerJobTopSpeedWindowing \

-Dclassloader.check-leaked-classloader=false \

-Dyarn.taskmanager.cpu.cores=2 \

-Dyarn.containers.vcores=2 \

-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \

/cluster/flink/examples/streaming/TopSpeedWindowing.jar \

--windowSize 3000 \

--eventsPerSecond 100

/cluster/flink/bin/flink run \

-t yarn-per-job \

-d \

-ynm YarnPerJobTopSpeedWindowing \

-yqu default \

-ys 2 \

-yjm 1024m \

-ytm 2048m \

-Dclassloader.check-leaked-classloader=false \

-Dyarn.taskmanager.cpu.cores=2 \

-Dyarn.containers.vcores=2 \

-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \

/cluster/flink/examples/streaming/TopSpeedWindowing.jar \

--windowSize 3000 \

--eventsPerSecond 100

sudo kill -9 $(ps -ef\|grep hadoop\|gawk '$ 0 !~/grep/ {print $2}' |tr -s '\n' ' ')

rm -rf /cluster/hadoop3/logs/*

/cluster/hadoop3/sbin/start-all.sh

====================================================================================================================

（二）Session 模式

该模式需要先启动 Yarn 会话，获取会话 ID 后再提交作业。

启动 Yarn 会话、启动会话后会打印输出会话 ID，示例：application_1740829241495_0001

/cluster/flink/bin/yarn-session.sh \

-jm 2048 \

-tm 2048 \

-s 1 \

-nm yarn-session-app \

-d

停止 yarn application -kill application_1740829241495_0001

yarn application -list

在 Yarn 会话中提交作业

/cluster/flink/bin/flink run \

-Dyarn.application.name=YarnSessionAppTopSpeedWindowing \

-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \

-yid application_1740829241495_0001 \

/cluster/flink/examples/streaming/TopSpeedWindowing.jar

这个会唤起一个新的flink进程，页面的端口不一定是8081

sudo kill -9 $(ps -ef\|grep hadoop\|gawk '$ 0 !~/grep/ {print $2}' |tr -s '\n' ' ')

rm -rf /cluster/hadoop3/logs/*

/cluster/hadoop3/sbin/start-all.sh

====================================================================================================================

三）Application 模式

/cluster/flink/bin/flink run-application \

-t yarn-application \

-Dparallelism.default=1 \

-Djobmanager.memory.process.size=2048m \

-Dtaskmanager.memory.process.size=2048m \

-Dyarn.application.name=RunApplicationTopSpeedWindowing \

-Dtaskmanager.numberOfTaskSlots=1 \

-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \

/cluster/flink/examples/streaming/TopSpeedWindowing.jar --windowSize 3000

问题1：Caused by: org.apache.flink.configuration.IllegalConfigurationException:

The number of requested virtual cores for application master 1 exceeds

the maximum number of virtual cores 0 available in the Yarn Cluster

yarn启动失败，spark-3.5.4-yarn-shuffle.jar文件，重启hadoop集群。

页面访问http://ip:18088/cluster

Web frontend listening at http://ip:42745.