FlinkCDC for mysql to Clickhouse

完整依赖

bash 复制代码
<dependencies>
       <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-core -->
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-core</artifactId>
           <version>1.13.0</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-streaming-java_2.12</artifactId>
           <version>1.13.0</version>
       </dependency>
 
<!--       <dependency>-->
<!--           <groupId>org.apache.flink</groupId>-->
<!--           <artifactId>flink-jdbc_2.12</artifactId>-->
<!--           <version>1.10.3</version>-->
<!--       </dependency>-->
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-connector-jdbc_2.12</artifactId>
           <version>1.13.0</version>
       </dependency>
 
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-java</artifactId>
           <version>1.13.0</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-clients_2.12</artifactId>
           <version>1.13.0</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-api-java-bridge_2.12</artifactId>
           <version>1.13.0</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-common</artifactId>
           <version>1.13.0</version>
       </dependency>
 
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-planner_2.12</artifactId>
           <version>1.13.0</version>
       </dependency>
 
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-planner-blink_2.12</artifactId>
           <version>1.13.0</version>
       </dependency>
       <dependency>
           <groupId>org.apache.flink</groupId>
           <artifactId>flink-table-planner-blink_2.12</artifactId>
           <version>1.13.0</version>
           <type>test-jar</type>
       </dependency>
 
       <dependency>
           <groupId>com.alibaba.ververica</groupId>
           <artifactId>flink-connector-mysql-cdc</artifactId>
           <version>1.4.0</version>
       </dependency>
 
 
       <dependency>
           <groupId>com.aliyun</groupId>
           <artifactId>flink-connector-clickhouse</artifactId>
           <version>1.12.0</version>
       </dependency>
       <dependency>
           <groupId>ru.yandex.clickhouse</groupId>
           <artifactId>clickhouse-jdbc</artifactId>
           <version>0.2.6</version>
       </dependency>
       <dependency>
           <groupId>com.google.code.gson</groupId>
           <artifactId>gson</artifactId>
           <version>2.8.6</version>
       </dependency>
   </dependencies>
bash 复制代码
package name.lijiaqi.cdc;
 
import com.alibaba.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.google.gson.Gson;
import com.google.gson.internal.LinkedTreeMap;
import io.debezium.data.Envelope;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import com.alibaba.ververica.cdc.connectors.mysql.MySQLSource;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.source.SourceRecord;
 
import org.apache.kafka.connect.data.Field;
import org.apache.kafka.connect.data.Schema;
import org.apache.kafka.connect.data.Struct;
 
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.util.HashMap;
 
public class MySqlBinlogSourceExample {
   public static void main(String[] args) throws Exception {
       SourceFunction<String> sourceFunction = MySQLSource.<String>builder()
              .hostname("localhost")
              .port(3306)
              .databaseList("test")
              .username("flinkcdc")
              .password("dafei1288")
              .deserializer(new JsonDebeziumDeserializationSchema())
              .build();
 
       StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
 
       // 添加 source
       env.addSource(sourceFunction)
       // 添加 sink
      .addSink(new ClickhouseSink());
 
       env.execute("mysql2clickhouse");
  }
 
   // 将cdc数据反序列化
   public static class JsonDebeziumDeserializationSchema implements DebeziumDeserializationSchema {
       @Override
       public void deserialize(SourceRecord sourceRecord, Collector collector) throws Exception {
 
           Gson jsstr = new Gson();
           HashMap<String, Object> hs = new HashMap<>();
 
           String topic = sourceRecord.topic();
           String[] split = topic.split("[.]");
           String database = split[1];
           String table = split[2];
           hs.put("database",database);
           hs.put("table",table);
           //获取操作类型
           Envelope.Operation operation = Envelope.operationFor(sourceRecord);
           //获取数据本身
           Struct struct = (Struct)sourceRecord.value();
           Struct after = struct.getStruct("after");
 
           if (after != null) {
               Schema schema = after.schema();
               HashMap<String, Object> afhs = new HashMap<>();
               for (Field field : schema.fields()) {
                   afhs.put(field.name(), after.get(field.name()));
              }
               hs.put("data",afhs);
          }
 
           String type = operation.toString().toLowerCase();
           if ("create".equals(type)) {
               type = "insert";
          }
           hs.put("type",type);
 
           collector.collect(jsstr.toJson(hs));
      }
 
       @Override
       public TypeInformation<String> getProducedType() {
           return BasicTypeInfo.STRING_TYPE_INFO;
      }
  }
 
 
   public static class ClickhouseSink extends RichSinkFunction<String>{
       Connection connection;
       PreparedStatement pstmt;
       private Connection getConnection() {
           Connection conn = null;
           try {
               Class.forName("ru.yandex.clickhouse.ClickHouseDriver");
               String url = "jdbc:clickhouse://localhost:8123/default";
               conn = DriverManager.getConnection(url,"default","dafei1288");
 
          } catch (Exception e) {
               e.printStackTrace();
          }
           return conn;
      }
 
       @Override
       public void open(Configuration parameters) throws Exception {
           super.open(parameters);
           connection = getConnection();
           String sql = "insert into sink_ch_test(id,name,description) values (?,?,?)";
           pstmt = connection.prepareStatement(sql);
      }
 
       // 每条记录插入时调用一次
       public void invoke(String value, Context context) throws Exception {
           //{"database":"test","data":{"name":"jacky","description":"fffff","id":8},"type":"insert","table":"test_cdc"}
           Gson t = new Gson();
           HashMap<String,Object> hs = t.fromJson(value,HashMap.class);
           String database = (String)hs.get("database");
           String table = (String)hs.get("table");
           String type = (String)hs.get("type");
 
           if("test".equals(database) && "test_cdc".equals(table)){
               if("insert".equals(type)){
                   System.out.println("insert => "+value);
                   LinkedTreeMap<String,Object> data = (LinkedTreeMap<String,Object>)hs.get("data");
                   String name = (String)data.get("name");
                   String description = (String)data.get("description");
                   Double id = (Double)data.get("id");
                   // 未前面的占位符赋值
                   pstmt.setInt(1, id.intValue());
                   pstmt.setString(2, name);
                   pstmt.setString(3, description);
 
                   pstmt.executeUpdate();
              }
          }
      }
 
       @Override
       public void close() throws Exception {
           super.close();
 
           if(pstmt != null) {
               pstmt.close();
          }
 
           if(connection != null) {
               connection.close();
          }
      }
  }
}
bash 复制代码
package name.lijiaqi.cdc;
 
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.SqlDialect;
import org.apache.flink.table.api.TableResult;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 
public class MysqlToMysqlMain {
   public static void main(String[] args) throws Exception {
       EnvironmentSettings fsSettings = EnvironmentSettings.newInstance()
              .useBlinkPlanner()
              .inStreamingMode()
              .build();
       StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
       env.setParallelism(1);
       StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, fsSettings);
 
 
 
       tableEnv.getConfig().setSqlDialect(SqlDialect.DEFAULT);
 
 
       // 数据源表
       String sourceDDL =
               "CREATE TABLE mysql_binlog (\n" +
                       " id INT NOT NULL,\n" +
                       " name STRING,\n" +
                       " description STRING\n" +
                       ") WITH (\n" +
                       " 'connector' = 'mysql-cdc',\n" +
                       " 'hostname' = 'localhost',\n" +
                       " 'port' = '3306',\n" +
                       " 'username' = 'flinkcdc',\n" +
                       " 'password' = 'dafei1288',\n" +
                       " 'database-name' = 'test',\n" +
                       " 'table-name' = 'test_cdc'\n" +
                       ")";
 
 
       String url = "jdbc:mysql://127.0.0.1:3306/test";
       String userName = "root";
       String password = "dafei1288";
       String mysqlSinkTable = "test_cdc_sink";
       // 输出目标表
       String sinkDDL =
               "CREATE TABLE test_cdc_sink (\n" +
                       " id INT NOT NULL,\n" +
                       " name STRING,\n" +
                       " description STRING,\n" +
                       " PRIMARY KEY (id) NOT ENFORCED \n " +
                       ") WITH (\n" +
                       " 'connector' = 'jdbc',\n" +
                       " 'driver' = 'com.mysql.jdbc.Driver',\n" +
                       " 'url' = '" + url + "',\n" +
                       " 'username' = '" + userName + "',\n" +
                       " 'password' = '" + password + "',\n" +
                       " 'table-name' = '" + mysqlSinkTable + "'\n" +
                       ")";
       // 简单的聚合处理
       String transformSQL =
               "insert into test_cdc_sink select * from mysql_binlog";
 
       tableEnv.executeSql(sourceDDL);
       tableEnv.executeSql(sinkDDL);
       TableResult result = tableEnv.executeSql(transformSQL);
 
       // 等待flink-cdc完成快照
       result.print();
       env.execute("sync-flink-cdc");
  }
 
}
相关推荐
Acrelhuang16 分钟前
安科瑞5G基站直流叠光监控系统-安科瑞黄安南
大数据·数据库·数据仓库·物联网
十叶知秋1 小时前
【jmeter】jmeter的线程组功能的详细介绍
数据库·jmeter·性能测试
瓜牛_gn3 小时前
mysql特性
数据库·mysql
奶糖趣多多4 小时前
Redis知识点
数据库·redis·缓存
CoderIsArt5 小时前
Redis的三种模式:主从模式,哨兵与集群模式
数据库·redis·缓存
师太,答应老衲吧7 小时前
SQL实战训练之,力扣:2020. 无流量的帐户数(递归)
数据库·sql·leetcode
Yaml48 小时前
Spring Boot 与 Vue 共筑二手书籍交易卓越平台
java·spring boot·后端·mysql·spring·vue·二手书籍
Channing Lewis8 小时前
salesforce case可以新建一个roll up 字段,统计出这个case下的email数量吗
数据库·salesforce
追风林8 小时前
mac 本地docker-mysql主从复制部署
mysql·macos·docker
毕业设计制作和分享9 小时前
ssm《数据库系统原理》课程平台的设计与实现+vue
前端·数据库·vue.js·oracle·mybatis