flink SQL实现mysql source sink

接上文:一文说清flink从编码到部署上线

环境说明:MySQL:5.7;flink:1.14.0;hadoop:3.0.0;操作系统:CentOS 7.6;JDK:1.8.0_401。

1.代码实现

1.1 EnvUtil实现

EnvUtil用于创建flink的运行环境。

java 复制代码
package com.zl.utils;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.time.Duration;
import java.time.ZoneOffset;
import java.util.concurrent.TimeUnit;

/**
 * EnvUtil
 * @description:
 */
public class EnvUtil {
    /**
     * 设置flink执行环境
     * @param parallelism 并行度
     */
    public static StreamExecutionEnvironment setFlinkEnv(int parallelism) {
        // System.setProperty("HADOOP_USER_NAME", "用户名") 对应的是 hdfs文件系统目录下的路径:/user/用户名的文件夹名,本文为root
        System.setProperty("HADOOP_USER_NAME", "root");
        Configuration conf = new Configuration();
        conf.setInteger("rest.port", 1000);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);

        if (parallelism >0 ){
            //设置并行度
            env.setParallelism(parallelism);
        } else {
            env.setParallelism(1);// 默认1
        }

        // 添加重启机制
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, Time.minutes(6)));
        // 启动checkpoint,设置模式为精确一次 (这是默认值),10*60*1000=60000
        env.enableCheckpointing(600000, CheckpointingMode.EXACTLY_ONCE);
        //rocksdb状态后端,启用增量checkpoint
        env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
        //设置checkpoint路径
        CheckpointConfig checkpointConfig = env.getCheckpointConfig();

        // 同一时间只允许一个 checkpoint 进行(默认)
        checkpointConfig.setMaxConcurrentCheckpoints(1);
        //最小间隔,10*60*1000=60000
        checkpointConfig.setMinPauseBetweenCheckpoints(60000);
        // 取消任务后,checkpoint仍然保存
        checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        //checkpoint容忍失败的次数
        checkpointConfig.setTolerableCheckpointFailureNumber(5);
        //checkpoint超时时间 默认10分钟
        checkpointConfig.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(10));
        //禁用operator chain(方便排查反压)
        env.disableOperatorChaining();
        return env;
    }

    public static StreamTableEnvironment getFlinkTenv(StreamExecutionEnvironment env) {
        StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
        //设置时区 东八
        tenv.getConfig().setLocalTimeZone(ZoneOffset.ofHours(8));
        Configuration configuration = tenv.getConfig().getConfiguration();
        // 开启miniBatch
        configuration.setString("table.exec.mini-batch.enabled", "true");
        // 批量输出的间隔时间
        configuration.setString("table.exec.mini-batch.allow-latency", "5 s");
        // 防止OOM设置每个批次最多缓存数据的条数,可以设为2万条
        configuration.setString("table.exec.mini-batch.size", "20000");
        // 开启LocalGlobal
        configuration.setString("table.optimizer.agg-phase-strategy", "TWO_PHASE");
        //设置TTL API指定
        tenv.getConfig().setIdleStateRetention(Duration.ofHours(25));

        return tenv;
    }

}

1.2 核心代码

java 复制代码
package com.zl;

import com.zl.utils.EnvUtil;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.util.concurrent.TimeUnit;

public class MysqlExampleSQL {
    public static void main(String[] args) throws Exception {

        // 配置运行环境,并行度1
        StreamExecutionEnvironment env = EnvUtil.setFlinkEnv(1);
        // 程序间隔离,每个程序单独设置
        env.getCheckpointConfig().setCheckpointStorage("hdfs://10.86.97.191:9000/flinktest/MysqlExampleSQL");

        EnvironmentSettings settings = EnvironmentSettings.newInstance().build();
        StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);


        /**
         * 数据库版本:8.0.27
         * mysql sink
         */
        tenv.executeSql(
                " CREATE TABLE `sink_products` (" +
                        "id INT," +
                        "name STRING," +
                        "description STRING," +
                        "PRIMARY KEY (`id`) NOT ENFORCED" +
                        ")with (" +
                        "'connector' = 'jdbc'," +
                        "'url' = 'jdbc:mysql://10.86.45.12:30105/flinktest2'," +
                        "'driver' = 'com.mysql.cj.jdbc.Driver'," +//com.mysql.jdbc.Driver,com.mysql.cj.jdbc.Driver
                        "'username' = '" + "root" + "'," +
                        "'password' = '" + "pwd" + "'," +// 记得修改为实际密码
                        "'table-name' = 'products'" +
                        ")"
        );

        /**
         * 数据库版本:5.7.20
         * cdc方式同步业务库数据
         */
        tenv.executeSql("CREATE TABLE `src_products`( " +
                "id INT," +
                "name STRING," +
                "description STRING," +
                "PRIMARY KEY (`id`)  NOT ENFORCED" +
                ") with (" +
                "'connector' = 'mysql-cdc', " +
                "'hostname' = '" + "10.86.37.169" + "', " +
                "'port' = '" + "3306" + "', " +
                "'username' = '" + "root" + "', " +
                "'password' = '" + "pwd" + "', " +// 记得修改为实际密码
                "'database-name' = '" + "flinktest1" + "', " +
                "'table-name' = 'products'," +
                "'debezium.snapshot.mode' = 'initial'" +
                ")"
        );

        /**
         * 数据同步
         */
        TableResult tableResult1 = tenv.executeSql("INSERT INTO sink_products " +
                "SELECT " +
                "id, " +
                "name, " +
                "description " +
                "FROM src_products");

        // 通过 TableResult 来获取作业状态
        tableResult1.print();
    }
}

1.3 pom.xml

注意修改此处:

2.web UI


3.数据库

flinktest1.products

flinktest2.products

4.部署

相关构建、部署,参考:一文说清flink从编码到部署上线

部署脚本:

powershell 复制代码
flink run-application -t yarn-application -Dparallelism.default=1 -Denv.java.opts=" -Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8" -Dtaskmanager.memory.process.size=1g -Dyarn.application.name="FlinkCdcMysql"  -Dtaskmanager.numberOfTaskSlots=1 -c com.zl.MysqlExampleSQL /home/FlickCDC-1.0-SNAPSHOT-jar-with-dependencies.jar

部署日志:

yarn:

5.常见问题

5.1 错误1

开发环境,错误日志:

"Caused by: java.lang.NoSuchMethodError: com.mysql.cj.CharsetMapping.getJavaEncodingForMysqlCharset(Ljava/lang/String;)Ljava/lang/String;"

解决:去掉pom.xml中"mysql-connector-java"相关依赖。

5.2 错误2

部署后,错误日志:

①"Caused by: org.apache.flink.table.api.ValidationException: Cannot discover a connector using option: 'connector'='mysql-cdc'"。

②"Caused by: org.apache.flink.table.api.ValidationException: Could not find any factory for identifier 'jdbc' that implements 'org.apache.flink.table.factories.DynamicTableFactory' in the classpath"。

解决:

"flink-connector-jdbc_2.11-1.14.0.jar"、"flink-connector-mysql-cdc-2.4.0.jar"放到服务器flink的lib目录,如下图所示:

5.3 错误3

部署后,错误日志:

" Exception java.lang.NoClassDefFoundError: com/mysql/cj/jdbc/Driver"。

解决:

"mysql-connector-java-8.0.27.jar"放到服务器flink的lib目录,如下图所示:

6.代码

完整代码见:https://gitee.com/core815/flink-cdc-mysql

相关推荐
源码集结号5 小时前
一套智慧工地云平台源码,支持监管端、项目管理端,Java+Spring Cloud +UniApp +MySql技术开发
java·mysql·spring cloud·uni-app·源码·智慧工地·成品系统
GanGuaGua5 小时前
MySQL:表的约束
数据库·mysql
Hello.Reader5 小时前
Flink 连接器与格式thin/uber 制品、打包策略与上线清单
大数据·flink
Li zlun6 小时前
MySQL 性能监控与安全管理完全指南
数据库·mysql·安全
Hello.Reader9 小时前
Flink 内置 Watermark 生成器单调递增与有界乱序怎么选?
大数据·flink
工作中的程序员9 小时前
flink UTDF函数
大数据·flink
工作中的程序员9 小时前
flink keyby使用与总结 基础片段梳理
大数据·flink
韩立学长9 小时前
【开题答辩实录分享】以《走失人口系统档案的设计与实现》为例进行答辩实录分享
mysql·mybatis·springboot
武子康10 小时前
大数据-119 - Flink Flink 窗口(Window)全解析:Tumbling、Sliding、Session 应用场景 使用详解 最佳实践
大数据·后端·flink