flink SQL实现mysql source sink

接上文:一文说清flink从编码到部署上线

环境说明:MySQL:5.7;flink:1.14.0;hadoop:3.0.0;操作系统:CentOS 7.6;JDK:1.8.0_401。

1.代码实现

1.1 EnvUtil实现

EnvUtil用于创建flink的运行环境。

java 复制代码
package com.zl.utils;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.time.Duration;
import java.time.ZoneOffset;
import java.util.concurrent.TimeUnit;

/**
 * EnvUtil
 * @description:
 */
public class EnvUtil {
    /**
     * 设置flink执行环境
     * @param parallelism 并行度
     */
    public static StreamExecutionEnvironment setFlinkEnv(int parallelism) {
        // System.setProperty("HADOOP_USER_NAME", "用户名") 对应的是 hdfs文件系统目录下的路径:/user/用户名的文件夹名,本文为root
        System.setProperty("HADOOP_USER_NAME", "root");
        Configuration conf = new Configuration();
        conf.setInteger("rest.port", 1000);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);

        if (parallelism >0 ){
            //设置并行度
            env.setParallelism(parallelism);
        } else {
            env.setParallelism(1);// 默认1
        }

        // 添加重启机制
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, Time.minutes(6)));
        // 启动checkpoint,设置模式为精确一次 (这是默认值),10*60*1000=60000
        env.enableCheckpointing(600000, CheckpointingMode.EXACTLY_ONCE);
        //rocksdb状态后端,启用增量checkpoint
        env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
        //设置checkpoint路径
        CheckpointConfig checkpointConfig = env.getCheckpointConfig();

        // 同一时间只允许一个 checkpoint 进行(默认)
        checkpointConfig.setMaxConcurrentCheckpoints(1);
        //最小间隔,10*60*1000=60000
        checkpointConfig.setMinPauseBetweenCheckpoints(60000);
        // 取消任务后,checkpoint仍然保存
        checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        //checkpoint容忍失败的次数
        checkpointConfig.setTolerableCheckpointFailureNumber(5);
        //checkpoint超时时间 默认10分钟
        checkpointConfig.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(10));
        //禁用operator chain(方便排查反压)
        env.disableOperatorChaining();
        return env;
    }

    public static StreamTableEnvironment getFlinkTenv(StreamExecutionEnvironment env) {
        StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
        //设置时区 东八
        tenv.getConfig().setLocalTimeZone(ZoneOffset.ofHours(8));
        Configuration configuration = tenv.getConfig().getConfiguration();
        // 开启miniBatch
        configuration.setString("table.exec.mini-batch.enabled", "true");
        // 批量输出的间隔时间
        configuration.setString("table.exec.mini-batch.allow-latency", "5 s");
        // 防止OOM设置每个批次最多缓存数据的条数,可以设为2万条
        configuration.setString("table.exec.mini-batch.size", "20000");
        // 开启LocalGlobal
        configuration.setString("table.optimizer.agg-phase-strategy", "TWO_PHASE");
        //设置TTL API指定
        tenv.getConfig().setIdleStateRetention(Duration.ofHours(25));

        return tenv;
    }

}

1.2 核心代码

java 复制代码
package com.zl;

import com.zl.utils.EnvUtil;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.util.concurrent.TimeUnit;

public class MysqlExampleSQL {
    public static void main(String[] args) throws Exception {

        // 配置运行环境,并行度1
        StreamExecutionEnvironment env = EnvUtil.setFlinkEnv(1);
        // 程序间隔离,每个程序单独设置
        env.getCheckpointConfig().setCheckpointStorage("hdfs://10.86.97.191:9000/flinktest/MysqlExampleSQL");

        EnvironmentSettings settings = EnvironmentSettings.newInstance().build();
        StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);


        /**
         * 数据库版本:8.0.27
         * mysql sink
         */
        tenv.executeSql(
                " CREATE TABLE `sink_products` (" +
                        "id INT," +
                        "name STRING," +
                        "description STRING," +
                        "PRIMARY KEY (`id`) NOT ENFORCED" +
                        ")with (" +
                        "'connector' = 'jdbc'," +
                        "'url' = 'jdbc:mysql://10.86.45.12:30105/flinktest2'," +
                        "'driver' = 'com.mysql.cj.jdbc.Driver'," +//com.mysql.jdbc.Driver,com.mysql.cj.jdbc.Driver
                        "'username' = '" + "root" + "'," +
                        "'password' = '" + "pwd" + "'," +// 记得修改为实际密码
                        "'table-name' = 'products'" +
                        ")"
        );

        /**
         * 数据库版本:5.7.20
         * cdc方式同步业务库数据
         */
        tenv.executeSql("CREATE TABLE `src_products`( " +
                "id INT," +
                "name STRING," +
                "description STRING," +
                "PRIMARY KEY (`id`)  NOT ENFORCED" +
                ") with (" +
                "'connector' = 'mysql-cdc', " +
                "'hostname' = '" + "10.86.37.169" + "', " +
                "'port' = '" + "3306" + "', " +
                "'username' = '" + "root" + "', " +
                "'password' = '" + "pwd" + "', " +// 记得修改为实际密码
                "'database-name' = '" + "flinktest1" + "', " +
                "'table-name' = 'products'," +
                "'debezium.snapshot.mode' = 'initial'" +
                ")"
        );

        /**
         * 数据同步
         */
        TableResult tableResult1 = tenv.executeSql("INSERT INTO sink_products " +
                "SELECT " +
                "id, " +
                "name, " +
                "description " +
                "FROM src_products");

        // 通过 TableResult 来获取作业状态
        tableResult1.print();
    }
}

1.3 pom.xml

注意修改此处:

2.web UI


3.数据库

flinktest1.products

flinktest2.products

4.部署

相关构建、部署,参考:一文说清flink从编码到部署上线

部署脚本:

powershell 复制代码
flink run-application -t yarn-application -Dparallelism.default=1 -Denv.java.opts=" -Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8" -Dtaskmanager.memory.process.size=1g -Dyarn.application.name="FlinkCdcMysql"  -Dtaskmanager.numberOfTaskSlots=1 -c com.zl.MysqlExampleSQL /home/FlickCDC-1.0-SNAPSHOT-jar-with-dependencies.jar

部署日志:

yarn:

5.常见问题

5.1 错误1

开发环境,错误日志:

"Caused by: java.lang.NoSuchMethodError: com.mysql.cj.CharsetMapping.getJavaEncodingForMysqlCharset(Ljava/lang/String;)Ljava/lang/String;"

解决:去掉pom.xml中"mysql-connector-java"相关依赖。

5.2 错误2

部署后,错误日志:

①"Caused by: org.apache.flink.table.api.ValidationException: Cannot discover a connector using option: 'connector'='mysql-cdc'"。

②"Caused by: org.apache.flink.table.api.ValidationException: Could not find any factory for identifier 'jdbc' that implements 'org.apache.flink.table.factories.DynamicTableFactory' in the classpath"。

解决:

"flink-connector-jdbc_2.11-1.14.0.jar"、"flink-connector-mysql-cdc-2.4.0.jar"放到服务器flink的lib目录,如下图所示:

5.3 错误3

部署后,错误日志:

" Exception java.lang.NoClassDefFoundError: com/mysql/cj/jdbc/Driver"。

解决:

"mysql-connector-java-8.0.27.jar"放到服务器flink的lib目录,如下图所示:

6.代码

完整代码见:https://gitee.com/core815/flink-cdc-mysql

相关推荐
Bdygsl1 小时前
MySQL(1)—— 基本概念和操作
数据库·mysql
身如柳絮随风扬1 小时前
什么是左匹配规则?
数据库·sql·mysql
jiankeljx1 小时前
mysql之如何获知版本
数据库·mysql
小李来了!2 小时前
数据库DDL、DML、DQL、DCL详解
数据库·mysql
念陌曦2 小时前
Flink总结
大数据·flink
我科绝伦(Huanhuan Zhou)3 小时前
【生产案例】MySQL InnoDB 数据损坏崩溃修复
数据库·mysql·adb
海棠蚀omo3 小时前
从零敲开 MySQL 的大门:库与表的基础操作实战(保姆级入门指南)
数据库·mysql
廋到被风吹走4 小时前
【MySql】超时问题分析
java·数据库·mysql
y = xⁿ4 小时前
重生之我创作出了小红书:对象存储模块,用户资料模块
后端·mysql·intellij-idea
Y001112364 小时前
Day10-MySQL-事物
数据库·sql·mysql