flink SQL实现mysql source sink

接上文:一文说清flink从编码到部署上线

环境说明:MySQL:5.7;flink:1.14.0;hadoop:3.0.0;操作系统:CentOS 7.6;JDK:1.8.0_401。

1.代码实现

1.1 EnvUtil实现

EnvUtil用于创建flink的运行环境。

java 复制代码
package com.zl.utils;

import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.time.Duration;
import java.time.ZoneOffset;
import java.util.concurrent.TimeUnit;

/**
 * EnvUtil
 * @description:
 */
public class EnvUtil {
    /**
     * 设置flink执行环境
     * @param parallelism 并行度
     */
    public static StreamExecutionEnvironment setFlinkEnv(int parallelism) {
        // System.setProperty("HADOOP_USER_NAME", "用户名") 对应的是 hdfs文件系统目录下的路径:/user/用户名的文件夹名,本文为root
        System.setProperty("HADOOP_USER_NAME", "root");
        Configuration conf = new Configuration();
        conf.setInteger("rest.port", 1000);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);

        if (parallelism >0 ){
            //设置并行度
            env.setParallelism(parallelism);
        } else {
            env.setParallelism(1);// 默认1
        }

        // 添加重启机制
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, Time.minutes(6)));
        // 启动checkpoint,设置模式为精确一次 (这是默认值),10*60*1000=60000
        env.enableCheckpointing(600000, CheckpointingMode.EXACTLY_ONCE);
        //rocksdb状态后端,启用增量checkpoint
        env.setStateBackend(new EmbeddedRocksDBStateBackend(true));
        //设置checkpoint路径
        CheckpointConfig checkpointConfig = env.getCheckpointConfig();

        // 同一时间只允许一个 checkpoint 进行(默认)
        checkpointConfig.setMaxConcurrentCheckpoints(1);
        //最小间隔,10*60*1000=60000
        checkpointConfig.setMinPauseBetweenCheckpoints(60000);
        // 取消任务后,checkpoint仍然保存
        checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        //checkpoint容忍失败的次数
        checkpointConfig.setTolerableCheckpointFailureNumber(5);
        //checkpoint超时时间 默认10分钟
        checkpointConfig.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(10));
        //禁用operator chain(方便排查反压)
        env.disableOperatorChaining();
        return env;
    }

    public static StreamTableEnvironment getFlinkTenv(StreamExecutionEnvironment env) {
        StreamTableEnvironment tenv = StreamTableEnvironment.create(env);
        //设置时区 东八
        tenv.getConfig().setLocalTimeZone(ZoneOffset.ofHours(8));
        Configuration configuration = tenv.getConfig().getConfiguration();
        // 开启miniBatch
        configuration.setString("table.exec.mini-batch.enabled", "true");
        // 批量输出的间隔时间
        configuration.setString("table.exec.mini-batch.allow-latency", "5 s");
        // 防止OOM设置每个批次最多缓存数据的条数,可以设为2万条
        configuration.setString("table.exec.mini-batch.size", "20000");
        // 开启LocalGlobal
        configuration.setString("table.optimizer.agg-phase-strategy", "TWO_PHASE");
        //设置TTL API指定
        tenv.getConfig().setIdleStateRetention(Duration.ofHours(25));

        return tenv;
    }

}

1.2 核心代码

java 复制代码
package com.zl;

import com.zl.utils.EnvUtil;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.contrib.streaming.state.EmbeddedRocksDBStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;

import java.util.concurrent.TimeUnit;

public class MysqlExampleSQL {
    public static void main(String[] args) throws Exception {

        // 配置运行环境,并行度1
        StreamExecutionEnvironment env = EnvUtil.setFlinkEnv(1);
        // 程序间隔离,每个程序单独设置
        env.getCheckpointConfig().setCheckpointStorage("hdfs://10.86.97.191:9000/flinktest/MysqlExampleSQL");

        EnvironmentSettings settings = EnvironmentSettings.newInstance().build();
        StreamTableEnvironment tenv = StreamTableEnvironment.create(env, settings);


        /**
         * 数据库版本:8.0.27
         * mysql sink
         */
        tenv.executeSql(
                " CREATE TABLE `sink_products` (" +
                        "id INT," +
                        "name STRING," +
                        "description STRING," +
                        "PRIMARY KEY (`id`) NOT ENFORCED" +
                        ")with (" +
                        "'connector' = 'jdbc'," +
                        "'url' = 'jdbc:mysql://10.86.45.12:30105/flinktest2'," +
                        "'driver' = 'com.mysql.cj.jdbc.Driver'," +//com.mysql.jdbc.Driver,com.mysql.cj.jdbc.Driver
                        "'username' = '" + "root" + "'," +
                        "'password' = '" + "pwd" + "'," +// 记得修改为实际密码
                        "'table-name' = 'products'" +
                        ")"
        );

        /**
         * 数据库版本:5.7.20
         * cdc方式同步业务库数据
         */
        tenv.executeSql("CREATE TABLE `src_products`( " +
                "id INT," +
                "name STRING," +
                "description STRING," +
                "PRIMARY KEY (`id`)  NOT ENFORCED" +
                ") with (" +
                "'connector' = 'mysql-cdc', " +
                "'hostname' = '" + "10.86.37.169" + "', " +
                "'port' = '" + "3306" + "', " +
                "'username' = '" + "root" + "', " +
                "'password' = '" + "pwd" + "', " +// 记得修改为实际密码
                "'database-name' = '" + "flinktest1" + "', " +
                "'table-name' = 'products'," +
                "'debezium.snapshot.mode' = 'initial'" +
                ")"
        );

        /**
         * 数据同步
         */
        TableResult tableResult1 = tenv.executeSql("INSERT INTO sink_products " +
                "SELECT " +
                "id, " +
                "name, " +
                "description " +
                "FROM src_products");

        // 通过 TableResult 来获取作业状态
        tableResult1.print();
    }
}

1.3 pom.xml

注意修改此处:

2.web UI


3.数据库

flinktest1.products

flinktest2.products

4.部署

相关构建、部署,参考:一文说清flink从编码到部署上线

部署脚本:

powershell 复制代码
flink run-application -t yarn-application -Dparallelism.default=1 -Denv.java.opts=" -Dfile.encoding=UTF-8 -Dsun.jnu.encoding=UTF-8" -Dtaskmanager.memory.process.size=1g -Dyarn.application.name="FlinkCdcMysql"  -Dtaskmanager.numberOfTaskSlots=1 -c com.zl.MysqlExampleSQL /home/FlickCDC-1.0-SNAPSHOT-jar-with-dependencies.jar

部署日志:

yarn:

5.常见问题

5.1 错误1

开发环境,错误日志:

"Caused by: java.lang.NoSuchMethodError: com.mysql.cj.CharsetMapping.getJavaEncodingForMysqlCharset(Ljava/lang/String;)Ljava/lang/String;"

解决:去掉pom.xml中"mysql-connector-java"相关依赖。

5.2 错误2

部署后,错误日志:

①"Caused by: org.apache.flink.table.api.ValidationException: Cannot discover a connector using option: 'connector'='mysql-cdc'"。

②"Caused by: org.apache.flink.table.api.ValidationException: Could not find any factory for identifier 'jdbc' that implements 'org.apache.flink.table.factories.DynamicTableFactory' in the classpath"。

解决:

"flink-connector-jdbc_2.11-1.14.0.jar"、"flink-connector-mysql-cdc-2.4.0.jar"放到服务器flink的lib目录,如下图所示:

5.3 错误3

部署后,错误日志:

" Exception java.lang.NoClassDefFoundError: com/mysql/cj/jdbc/Driver"。

解决:

"mysql-connector-java-8.0.27.jar"放到服务器flink的lib目录,如下图所示:

6.代码

完整代码见:https://gitee.com/core815/flink-cdc-mysql

相关推荐
雾里看山9 分钟前
【MySQL】数据库基础知识
数据库·笔记·mysql·oracle
wallezhou2 小时前
mysql数据库启动出现Plugin ‘FEEDBACK‘ is disabled.问题解决记录
数据库·mysql
{⌐■_■}3 小时前
【GORM】事务,嵌套事务,保存点事务的使用,简单电商平台go案例
开发语言·jvm·后端·mysql·golang
暮湫3 小时前
MySQL(4)多表查询
数据库·mysql
TDengine (老段)3 小时前
TDengine 做为 FLINK 数据源技术参考手册
大数据·数据库·flink·时序数据库·tdengine·涛思数据
m0_748252384 小时前
三分钟内快速完成MySQL到达梦数据库的迁移
数据库·mysql
m0_748255654 小时前
MySQL篇之对MySQL进行参数优化,提高MySQL性能
数据库·mysql
shenghuiping20014 小时前
SQLmap 自动注入 -02
mysql·web·sql注入·sqlmap
m0_748256567 小时前
MySQL 实战 4 种将数据同步到ES方案
数据库·mysql·elasticsearch
蚂蚁质量7 小时前
mysql的测试方案
数据库·mysql