1、Flink部署
分发各节点
powershell
ansible cluster -m copy -a "src=/opt/software/flink-2.2.1-bin-scala_2.12.tgz dest=/opt/software"
解压
powershell
ansible cluster -m shell -a "tar -zxf /opt/software/flink-2.2.1-bin-scala_2.12.tgz -C /usr/bigtop/3.3.0/usr/lib/ && chown -R root:root /usr/bigtop/3.3.0/usr/lib/flink-2.2.1 && chmod -R 755 /usr/bigtop/3.3.0/usr/lib/flink-2.2.1"
创建软连接
powershell
ansible cluster -m shell -a "ln -s /usr/bigtop/3.3.0/usr/lib/flink-2.2.1 /usr/bigtop/current/flink"
修改log目录权限
powershell
ansible cluster -m shell -a "chmod 777 /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/log"
修改配置
powershell
# These parameters are required for Java 17 support.
# They can be safely removed when using Java 8/11.
env:
java:
opts:
all: --add-exports=java.rmi/sun.rmi.registry=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-exports=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED -
# log4j 2 configuration
log:
level: TRACE
max: 5
dir: /data/flink/logs
#==============================================================================
# Common
#==============================================================================
jobmanager:
bind-host: 0.0.0.0
rpc:
address: 0.0.0.0
port: 6123
memory:
process:
size: 1024m
execution:
failover-strategy: region
taskmanager:
bind-host: 0.0.0.0
host: 0.0.0.0
numberOfTaskSlots: 1
memory:
process:
size: 2048m
parallelism:
default: 1
fs:
default-scheme: hdfs://hdfs-ha
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
execution:
checkpointing:
interval: 3min
externalized-checkpoint-retention: RETAIN_ON_CANCELLATION
max-concurrent-checkpoints: 1
min-pause: 1min
mode: EXACTLY_ONCE
timeout: 15min
tolerable-failed-checkpoints: 2
unaligned: false
incremental: true
dir: hdfs://hdfs-ha/flink/checkpoints
savepoint-dir: hdfs://hdfs-ha/flink/savepoints
state:
backend:
type: rocksdb
#==============================================================================
# Rest & web frontend
#==============================================================================
rest:
address: 0.0.0.0
bind-address: 0.0.0.0
# port: 8081
# bind-port: 8080-8090
web:
submit:
enable: true
cancel:
enable: true
#==============================================================================
# Advanced
#==============================================================================
# 优先使用任务Jar包里的类,而不是Flink自身lib里的类
classloader:
resolve:
order: child-first
taskmanager:
memory:
network:
fraction: 0.1
min: 64mb
max: 1gb
#==============================================================================
# Flink Cluster Security Configuration
#==============================================================================
# security:
# kerberos:
# login:
# use-ticket-cache: true
# keytab: /path/to/kerberos/keytab
# principal: flink-user
# # The configuration below defines which JAAS login contexts
# contexts: Client,KafkaClient
#==============================================================================
# ZK Security Configuration
#==============================================================================
# zookeeper:
# sasl:
# # Below configurations are applicable if ZK ensemble is configured for security
# #
# # Override below configuration to provide custom ZK service name if configured
# # zookeeper.sasl.service-name: zookeeper
# #
# # The configuration below must match one of the values set in "security.kerberos.login.contexts"
# login-context-name: Client
#==============================================================================
# HistoryServer
#==============================================================================
jobmanager:
archive:
fs:
dir: hdfs://hdfs-ha/flink/completed-jobs/
historyserver:
web:
address: 0.0.0.0
port: 8082
archive:
fs:
dir: hdfs://hdfs-ha/flink/completed-jobs/
fs.refresh-interval: 10000
2、Flink On Yarn
2.1、WindowJoin
powershell
sudo -u hive \
env JAVA_HOME=/usr/java/jdk-17.0.8 \
env PATH=$JAVA_HOME/bin:$PATH \
env env HADOOP_CLASSPATH=$(hadoop classpath) \
/usr/bigtop/current/flink/bin/flink run \
-t yarn-application \
-Denv.java.home=/usr/java/jdk-17.0.8 \
-Dcontainerized.master.env.JAVA_HOME=/usr/java/jdk-17.0.8 \
-Dcontainerized.taskmanager.env.JAVA_HOME=/usr/java/jdk-17.0.8 \
-Dyarn.application.name=WindowJoin \
-c org.apache.flink.streaming.examples.join.WindowJoin \
/usr/bigtop/current/flink/examples/streaming/WindowJoin.jar \
--windowSize 5000 \
--rate 10

2.2、TopSpeedWindowing
powershell
sudo -u hdfs \
env JAVA_HOME=/usr/java/jdk-17.0.8 \
env PATH=$JAVA_HOME/bin:$PATH \
env env HADOOP_CLASSPATH=$(hadoop classpath) \
/usr/bigtop/current/flink/bin/flink run \
-t yarn-application \
-Denv.java.home=/usr/java/jdk-17.0.8 \
-Dcontainerized.master.env.JAVA_HOME=/usr/java/jdk-17.0.8 \
-Dcontainerized.taskmanager.env.JAVA_HOME=/usr/java/jdk-17.0.8 \
/usr/bigtop/current/flink/examples/streaming/TopSpeedWindowing.jar

3、集成Paimon
3.1、Paimon Jars
下载jars:https://flink.apache.org/downloads/
powershell
ansible cluster -m shell -a "wget https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-flink-action/1.4.1/paimon-flink-action-1.4.1.jar -O /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib/paimon-flink-action-1.4.1.jar"
ansible cluster -m shell -a "wget https://repo.maven.apache.org/maven2/org/apache/paimon/paimon-flink-2.2/1.4.1/paimon-flink-2.2-1.4.1.jar -O /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib/paimon-flink-2.2-1.4.1.jar"
ansible cluster -m shell -a "wget https://repo.maven.apache.org/maven2/org/apache/flink/flink-shaded-hadoop-2-uber/2.8.3-10.0/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar -O /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib/flink-shaded-hadoop-2-uber-2.8.3-10.0.jar"
ansible cluster -m shell -a "chmod -R 755 /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib"
4、Paimon Catalogs
4.1、Filesystem Catalog
启动Flink Yarn Session集群
powershell
sudo -u hdfs \
env JAVA_HOME=/usr/java/jdk-17.0.8 \
env PATH=$JAVA_HOME/bin:$PATH \
env env HADOOP_CLASSPATH=$(hadoop classpath) \
/usr/bigtop/current/flink/bin/yarn-session.sh \
-nm yarn-session -tm 1024m -s 3 -d \
-Denv.java.home=/usr/java/jdk-17.0.8 \
-Dcontainerized.master.env.JAVA_HOME=/usr/java/jdk-17.0.8 \
-Dcontainerized.taskmanager.env.JAVA_HOME=/usr/java/jdk-17.0.8
启动Flink SQL Client
powershell
sudo -u hdfs \
env JAVA_HOME=/usr/java/jdk-17.0.8 \
env PATH=$JAVA_HOME/bin:$PATH \
env env HADOOP_CLASSPATH=$(hadoop classpath) \
/usr/bigtop/current/flink/bin/sql-client.sh -s yarn-session \
-Denv.java.home=/usr/java/jdk-17.0.8 \
-Dcontainerized.master.env.JAVA_HOME=/usr/java/jdk-17.0.8 \
-Dcontainerized.taskmanager.env.JAVA_HOME=/usr/java/jdk-17.0.8
设置结果显示模式
powershell
SET 'sql-client.execution.result-mode' = 'tableau';
文件系统Catalog
powershell
create catalog hdfs_catalog with (
'type'='paimon',
'warehouse'='hdfs://hdfs-ha/paimon/hdfs'
);
查看catalogs
powershell
show catalogs;
use catalog hdfs_catalog;
show current catalog;
创建表
powershell
create table word_count (
word string primary key not enforced,
calc bigint
);
写入数据
powershell
create temporary table word_table (
word string
) with (
'connector' = 'datagen',
'fields.word.length' = '1',
'rows-per-second' = '10'
);
set 'execution.checkpointing.interval' = '10 s';
insert into word_count select word, count(*) from word_table group by word;
sql
Setting HBASE_CONF_DIR=/etc/hbase/conf because no HBASE_CONF_DIR was set.
find: Failed to restore initial working directory: /root: Permission denied
find: Failed to restore initial working directory: /root: Permission denied
find: Failed to restore initial working directory: /root: Permission denied
find: Failed to restore initial working directory: /root: Permission denied
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib/log4j-slf4j-impl-2.24.3.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/bigtop/3.3.0/usr/lib/hadoop/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/bigtop/3.3.0/usr/lib/tez/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
2026-05-18 17:09:16,601 INFO org.apache.flink.yarn.cli.FlinkYarnSessionCli [] - Found Yarn properties file under /tmp/.yarn-properties-hdfs.
2026-05-18 17:09:16,601 INFO org.apache.flink.yarn.cli.FlinkYarnSessionCli [] - Found Yarn properties file under /tmp/.yarn-properties-hdfs.
▒▓██▓██▒
▓████▒▒█▓▒▓███▓▒
▓███▓░░ ▒▒▒▓██▒ ▒
░██▒ ▒▒▓▓█▓▓▒░ ▒████
██▒ ░▒▓███▒ ▒█▒█▒
░▓█ ███ ▓░▒██
▓█ ▒▒▒▒▒▓██▓░▒░▓▓█
█░ █ ▒▒░ ███▓▓█ ▒█▒▒▒
████░ ▒▓█▓ ██▒▒▒ ▓███▒
░▒█▓▓██ ▓█▒ ▓█▒▓██▓ ░█░
▓░▒▓████▒ ██ ▒█ █▓░▒█▒░▒█▒
███▓░██▓ ▓█ █ █▓ ▒▓█▓▓█▒
░██▓ ░█░ █ █▒ ▒█████▓▒ ██▓░▒
███░ ░ █░ ▓ ░█ █████▒░░ ░█░▓ ▓░
██▓█ ▒▒▓▒ ▓███████▓░ ▒█▒ ▒▓ ▓██▓
▒██▓ ▓█ █▓█ ░▒█████▓▓▒░ ██▒▒ █ ▒ ▓█▒
▓█▓ ▓█ ██▓ ░▓▓▓▓▓▓▓▒ ▒██▓ ░█▒
▓█ █ ▓███▓▒░ ░▓▓▓███▓ ░▒░ ▓█
██▓ ██▒ ░▒▓▓███▓▓▓▓▓██████▓▒ ▓███ █
▓███▒ ███ ░▓▓▒░░ ░▓████▓░ ░▒▓▒ █▓
█▓▒▒▓▓██ ░▒▒░░░▒▒▒▒▓██▓░ █▓
██ ▓░▒█ ▓▓▓▓▒░░ ▒█▓ ▒▓▓██▓ ▓▒ ▒▒▓
▓█▓ ▓▒█ █▓░ ░▒▓▓██▒ ░▓█▒ ▒▒▒░▒▒▓█████▒
██░ ▓█▒█▒ ▒▓▓▒ ▓█ █░ ░░░░ ░█▒
▓█ ▒█▓ ░ █░ ▒█ █▓
█▓ ██ █░ ▓▓ ▒█▓▓▓▒█░
█▓ ░▓██░ ▓▒ ▓█▓▒░░░▒▓█░ ▒█
██ ▓█▓░ ▒ ░▒█▒██▒ ▓▓
▓█▒ ▒█▓▒░ ▒▒ █▒█▓▒▒░░▒██
░██▒ ▒▓▓▒ ▓██▓▒█▒ ░▓▓▓▓▒█▓
░▓██▒ ▓░ ▒█▓█ ░░▒▒▒
▒▓▓▓▓▓▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒░░▓▓ ▓░▒█░
______ _ _ _ _____ ____ _ _____ _ _ _ BETA
| ____| (_) | | / ____|/ __ \| | / ____| (_) | |
| |__ | |_ _ __ | | __ | (___ | | | | | | | | |_ ___ _ __ | |_
| __| | | | '_ \| |/ / \___ \| | | | | | | | | |/ _ \ '_ \| __|
| | | | | | | | < ____) | |__| | |____ | |____| | | __/ | | | |_
|_| |_|_|_| |_|_|\_\ |_____/ \___\_\______| \_____|_|_|\___|_| |_|\__|
Welcome! Enter 'HELP;' to list all available commands. 'QUIT;' to exit.
Command history file path: /home/hdfs/.flink-sql-history
Flink SQL> SET 'sql-client.execution.result-mode' = 'tableau';
[INFO] Execute statement succeeded.
Flink SQL>
> create catalog hdfs_catalog with (
> 'type'='paimon',
> 'warehouse'='hdfs://hdfs-ha/paimon/hdfs'
> );2026-05-18 17:13:44,791 WARN org.apache.hadoop.hdfs.shortcircuit.DomainSocketFactory [] - The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.
[INFO] Execute statement succeeded.
Flink SQL> USE CATALOG hdfs_catalog;
[INFO] Execute statement succeeded.
Flink SQL> show catalogs;
+-----------------+
| catalog name |
+-----------------+
| default_catalog |
| hdfs_catalog |
+-----------------+
2 rows in set
Flink SQL> show current catalog;
+----------------------+
| current catalog name |
+----------------------+
| hdfs_catalog |
+----------------------+
1 row in set
Flink SQL> create table word_count (
> word string primary key not enforced,
> calc bigint
> );
[INFO] Execute statement succeeded.
Flink SQL> create temporary table word_table (
> word string
> ) with (
> 'connector' = 'datagen',
> 'fields.word.length' = '1',
> 'rows-per-second' = '10'
> );
[INFO] Execute statement succeeded.
Flink SQL> SET 'execution.checkpointing.interval' = '10 s';
[INFO] Execute statement succeeded.
Flink SQL> insert into word_count select word, count(*) from word_table group by word;2026-05-18 17:25:37,904 WARN org.apache.flink.yarn.configuration.YarnLogConfigUtil [] - The configuration directory ('/usr/bigtop/3.3.0/usr/lib/flink-2.2.1/conf') already contains a LOG4J config file.If you want to use logback, then please delete or rename the log configuration file.
Flink Web UI

查询
powershell
SELECT * FROM word_count;
powershell
+----+--------------------------------+----------------------+
| op | word | calc |
+----+--------------------------------+----------------------+
| +I | 0 | 224 |
| +I | 1 | 226 |
| +I | 2 | 205 |
| +I | 3 | 247 |
| +I | 4 | 223 |
| +I | 5 | 204 |
| +I | 6 | 209 |
| +I | 7 | 216 |
关闭 checkpoint,切换为 批处理 BATCH 模式
powershell
RESET 'execution.checkpointing.interval';
SET 'execution.runtime-mode' = 'batch';
SELECT * FROM word_count;
powershell
+------+------+
| word | calc |
+------+------+
| 0 | 334 |
| 1 | 322 |
| 2 | 317 |
| 3 | 356 |
| 4 | 336 |
| 5 | 299 |
| 6 | 324 |
| 7 | 322 |
| 8 | 324 |
| 9 | 338 |
| a | 325 |
| b | 323 |
| c | 342 |
| d | 327 |
| e | 322 |
| f | 319 |
+------+------+
16 rows in set (1.12 seconds)
退出
powershell
EXIT;
Stop Flink cluster
powershell
./bin/stop-cluster.sh
4.2、Hive Catalog
Add Flink Jars
powershell
cd /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib
ansible cluster -m shell -a "wget https://repo1.maven.org/maven2/org/apache/flink/flink-sql-connector-hive-3.1.3_2.12/3.0.0-1.20/flink-sql-connector-hive-3.1.3_2.12-3.0.0-1.20.jar -O /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib/flink-sql-connector-hive-3.1.3_2.12-3.0.0-1.20.jar"
ansible cluster -m shell -a "cp /usr/bigtop/3.3.0/usr/lib/hive/lib/hive-exec-3.1.3.jar /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib/"
ansible cluster -m shell -a "cp /usr/bigtop/3.3.0/usr/lib/hive/lib/libfb303-0.9.3.jar /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib/"
ansible cluster -m shell -a "cp /usr/bigtop/3.3.0/usr/lib/hive/lib/antlr-runtime-3.5.2.jar /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib/"
ansible cluster -m shell -a "chmod -R 755 /usr/bigtop/3.3.0/usr/lib/flink-2.2.1/lib"
Add Hive Jars
powershell
ansible cluster -m shell -a "mkdir -p /usr/bigtop/3.3.0/usr/lib/hive/auxlib"
ansible cluster -m shell -a "wget https://repo1.maven.org/maven2/org/apache/paimon/paimon-hive-connector-3.1/1.4.1/paimon-hive-connector-3.1-1.4.1.jar -O /usr/bigtop/3.3.0/usr/lib/hive/auxlib/paimon-hive-connector-3.1-1.4.1.jar"
启动Flink Yarn Session集群
powershell
sudo -u hive \
env JAVA_HOME=/usr/java/jdk-17.0.8 \
env PATH=$JAVA_HOME/bin:$PATH \
env env HADOOP_CLASSPATH=$(hadoop classpath) \
/usr/bigtop/current/flink/bin/yarn-session.sh \
-nm paimon-catalog -tm 1024m -s 3 -d \
-Denv.java.home=/usr/java/jdk-17.0.8 \
-Dcontainerized.master.env.JAVA_HOME=/usr/java/jdk-17.0.8 \
-Dcontainerized.taskmanager.env.JAVA_HOME=/usr/java/jdk-17.0.8
启动Flink SQL Client
powershell
sudo -u hive \
env JAVA_HOME=/usr/java/jdk-17.0.8 \
env PATH=$JAVA_HOME/bin:$PATH \
env env HADOOP_CLASSPATH=$(hadoop classpath) \
/usr/bigtop/current/flink/bin/sql-client.sh -s yarn-session \
-Denv.java.home=/usr/java/jdk-17.0.8 \
-Dcontainerized.master.env.JAVA_HOME=/usr/java/jdk-17.0.8 \
-Dcontainerized.taskmanager.env.JAVA_HOME=/usr/java/jdk-17.0.8
创建Hive Catalog
powershell
SET 'sql-client.execution.result-mode' = 'tableau';
SET 'execution.runtime-mode' = 'streaming';
SET 'pipeline.name' = 'Paimon-Datagen';
SET 'classloader.resolve.order' = 'child-first';
SET 'classloader.check-leaked-classloader' = 'false';
SET taskmanager.memory.process.size = 6G;
SET taskmanager.memory.heap.size = 4G;
SET taskmanager.memory.managed.size = 1G;
-- Checkpoint 生产级
SET execution.checkpointing.interval = 60s;
SET execution.checkpointing.mode = EXACTLY_ONCE;
SET execution.checkpointing.timeout = 15min;
SET execution.checkpointing.max-concurrent-checkpoints = 1;
SET execution.checkpointing.tolerable-failed-checkpoints = 3;
SET state.backend = rocksdb;
SET state.checkpoints.num-retained = 10;
SET state.backend.incremental = true;
-- Catalog
CREATE CATALOG hive_catalog WITH (
'type' = 'paimon',
'metastore' = 'hive',
'hive-conf-dir' = '/etc/hive/conf',
'hadoop-conf-dir' = '/etc/hadoop/conf'
);
use catalog hive_catalog;
use tmp;
-- 防 OOM 终极表参数
drop table if exists t_datagen;
create table if not exists t_datagen (
id int,
name string,
age int,
gender string,
city string,
score double,
phone string,
email string,
create_time timestamp
) WITH (
'connector' = 'paimon',
'bucket' = '1',
'bucket-key' = 'id',
'file.format' = 'parquet',
'parquet.compression' = 'snappy',
'parquet.block.size' = '16777216',
'writer.buffer.size' = '2097152',
'write-buffer-spillable' = 'true',
'compaction.max-threads' = '0',
'sink.flush-interval' = '30000',
'sink.max-write-buffer-size' = '8388608'
);
-- DataGen源
create temporary table datagen_source (
id int,
name string,
age int,
gender string,
city string,
score double,
phone string,
email string,
create_time timestamp
) WITH (
'connector' = 'datagen',
'rows-per-second' = '500',
'fields.id.min' = '1',
'fields.id.max' = '1000000',
'fields.age.min' = '18',
'fields.age.max' = '60',
'fields.score.min' = '60',
'fields.score.max' = '100'
);
-- 插入
insert into t_datagen select * from datagen_source;

