使用 Apache Flink CDC 3.0 实现 MySQL 到 Elasticsearch 的数据同步

下面我将创建一个完整的 Spring Boot 项目,使用 Flink CDC 3.0 基于 MySQL 的 binlog 实现数据同步到 Elasticsearch。

项目概述

这个项目将:

  1. 使用 Flink CDC 连接 MySQL 并读取 binlog
  2. 处理数据变化(插入、更新、删除)
  3. 将数据同步到 Elasticsearch
  4. 提供 REST API 管理同步任务

项目结构

复制代码
src/main/java/
├── com/example/cdc/
│   ├── config/
│   │   ├── FlinkConfig.java
│   │   └── ElasticsearchConfig.java
│   ├── model/
│   │   └── User.java
│   ├── service/
│   │   ├── SyncService.java
│   │   └── JobManager.java
│   ├── controller/
│   │   └── SyncController.java
│   └── FlinkCdcApplication.java

1. 添加依赖 (pom.xml)

xml 复制代码
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>com.example</groupId>
    <artifactId>FlinkCDC</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>FlinkCDC</name>
    <description>FlinkCDC</description>
    <properties>
        <java.version>11</java.version>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <spring-boot.version>2.7.6</spring-boot.version>
        <flink.version>1.16.0</flink.version>
        <flink-cdc.version>3.0.1</flink-cdc.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <!-- Flink CDC -->
        <dependency>
            <groupId>com.ververica</groupId>
            <artifactId>flink-connector-mysql-cdc</artifactId>
            <version>${flink-cdc.version}</version>
        </dependency>

        <!-- Flink Connector Elasticsearch -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-elasticsearch7</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- Flink JSON -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- Flink Java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>

        <!-- Flink CLI -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>${flink.version}</version>
        </dependency>
        <dependency>
            <groupId>org.projectlombok</groupId>
            <artifactId>lombok</artifactId>
            <version>1.18.36</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
        </dependency>
    </dependencies>
    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-dependencies</artifactId>
                <version>${spring-boot.version}</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.8.1</version>
                <configuration>
                    <source>11</source>
                    <target>11</target>
                    <encoding>UTF-8</encoding>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <version>${spring-boot.version}</version>
                <configuration>
                    <mainClass>com.example.cdc.FlinkCdcApplication</mainClass>
                    <skip>true</skip>
                </configuration>
                <executions>
                    <execution>
                        <id>repackage</id>
                        <goals>
                            <goal>repackage</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

2. 数据模型 (User.java)

java 复制代码
package com.example.cdc.model;

import lombok.Data;

@Data
public class User {
    private Long id;
    private String name;
    private String email;
    private Long createdAt;
    private Long updatedAt;
    private Boolean deleted;
}
java 复制代码
package com.example.cdc.config;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class FlinkConfig {
    
    @Bean
    public StreamExecutionEnvironment streamExecutionEnvironment() {
        return StreamExecutionEnvironment.getExecutionEnvironment();
    }
}

4. Elasticsearch 配置 (ElasticsearchConfig.java)

java 复制代码
package com.example.cdc.config;

import org.apache.http.HttpHost;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class ElasticsearchConfig {
    
    @Value("${elasticsearch.host:localhost}")
    private String host;
    
    @Value("${elasticsearch.port:9200}")
    private int port;
    
    @Bean(destroyMethod = "close")
    public RestHighLevelClient restHighLevelClient() {
        return new RestHighLevelClient(
            RestClient.builder(new HttpHost(host, port, "http"))
        );
    }
}

5. 同步服务 (SyncService.java)

java 复制代码
package com.example.cdc.service;

import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.elasticsearch7.ElasticsearchSink;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import org.elasticsearch.xcontent.XContentType;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.List;

@Service
public class SyncService {

    @Autowired
    private StreamExecutionEnvironment env;

    @Value("${mysql.host:localhost}")
    private String mysqlHost;

    @Value("${mysql.port:3306}")
    private int mysqlPort;

    @Value("${mysql.username:root}")
    private String mysqlUsername;

    @Value("${mysql.password:password}")
    private String mysqlPassword;

    @Value("${mysql.database:test}")
    private String mysqlDatabase;

    @Value("${mysql.table:users}")
    private String mysqlTable;

    @Value("${elasticsearch.host:localhost}")
    private String esHost;

    @Value("${elasticsearch.port:9200}")
    private int esPort;

    @Value("${elasticsearch.index:users}")
    private String esIndex;

    public void startSync() throws Exception {
        // 创建 MySQL CDC Source
        MySqlSource<String> mySqlSource = MySqlSource.<String>builder()
                .hostname(mysqlHost)
                .port(mysqlPort)
                .databaseList(mysqlDatabase)
                .tableList(mysqlDatabase + "." + mysqlTable)
                .username(mysqlUsername)
                .password(mysqlPassword)
                .deserializer(new JsonDebeziumDeserializationSchema())
                .build();

        // 创建数据流
        DataStream<String> stream = env.fromSource(
                mySqlSource,
                org.apache.flink.api.common.eventtime.WatermarkStrategy.noWatermarks(),
                "MySQL Source"
        );

        // 转换和处理数据
        DataStream<String> processedStream = stream.map(new MapFunction<String, String>() {
            @Override
            public String map(String value) throws Exception {
                // 这里可以添加自定义的数据处理逻辑
                return value;
            }
        });

        // 配置 Elasticsearch Sink
        List<HttpHost> httpHosts = new ArrayList<>();
        httpHosts.add(new HttpHost(esHost, esPort, "http"));

        ElasticsearchSink.Builder<String> esSinkBuilder = new ElasticsearchSink.Builder<>(
                httpHosts,
                (element, ctx, indexer) -> {
                    IndexRequest request = Requests.indexRequest()
                            .index(esIndex)
                            .source(element, XContentType.JSON);
                    indexer.add(request);
                }
        );

        // 配置批量请求
        esSinkBuilder.setBulkFlushMaxActions(1);

        // 将数据发送到 Elasticsearch - 使用 addSink 而不是 sinkTo
        processedStream.addSink(esSinkBuilder.build());

        // 启动任务
        env.execute("MySQL to Elasticsearch Sync");
    }
}

6. 任务管理器 (JobManager.java)

java 复制代码
package com.example.cdc.service;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import javax.annotation.PreDestroy;

@Component
public class JobManager {
    
    @Autowired
    private StreamExecutionEnvironment env;
    
    @Autowired
    private SyncService syncService;
    
    private Thread jobThread;
    
    public void startJob() {
        jobThread = new Thread(() -> {
            try {
                syncService.startSync();
            } catch (Exception e) {
                e.printStackTrace();
            }
        });
        
        jobThread.start();
    }
    
    public void stopJob() {
        if (env != null) {
            try {
                env.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
        
        if (jobThread != null && jobThread.isAlive()) {
            jobThread.interrupt();
        }
    }
    
    @PreDestroy
    public void onDestroy() {
        stopJob();
    }
}

7. REST 控制器 (SyncController.java)

java 复制代码
package com.example.cdc.controller;

import com.example.cdc.service.JobManager;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

@RestController
@RequestMapping("/api/sync")
public class SyncController {
    
    @Autowired
    private JobManager jobManager;
    
    @PostMapping("/start")
    public String startSync() {
        try {
            jobManager.startJob();
            return "Sync job started successfully";
        } catch (Exception e) {
            return "Failed to start sync job: " + e.getMessage();
        }
    }
    
    @PostMapping("/stop")
    public String stopSync() {
        try {
            jobManager.stopJob();
            return "Sync job stopped successfully";
        } catch (Exception e) {
            return "Failed to stop sync job: " + e.getMessage();
        }
    }
}

8. 应用主类 (FlinkCdcApplication.java)

java 复制代码
package com.example.cdc;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class FlinkCdcApplication {
    
    public static void main(String[] args) {
        SpringApplication.run(FlinkCdcApplication.class, args);
    }
}

9. 配置文件 (application.yml)

yaml 复制代码
server:
  port: 8080

spring:
  application:
    name: mysql-cdc-to-es

mysql:
  host: localhost
  port: 3306
  username: root
  password: your_mysql_password
  database: your_database
  table: your_table

elasticsearch:
  host: localhost
  port: 9200
  index: your_es_index

flink:
  parallelism: 1

使用说明

  1. 确保 MySQL 已开启 binlog:

    sql 复制代码
    SHOW VARIABLES LIKE 'log_bin';

    如果未开启,需要在 MySQL 配置文件中添加:

    复制代码
    [mysqld]
    server-id=1
    log-bin=mysql-bin
    binlog_format=row
    binlog_row_image=full
  2. 创建具有复制权限的 MySQL 用户:

    sql 复制代码
    CREATE USER 'flink_user'@'%' IDENTIFIED BY 'password';
    GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'flink_user'@'%';
    FLUSH PRIVILEGES;
  3. 启动应用程序:

    复制代码
    mvn spring-boot:run
  4. 通过 REST API 启动同步任务:

    复制代码
    POST http://localhost:8080/api/sync/start
相关推荐
花果山总钻风几秒前
MySQL奔溃,InnoDB文件损坏修复记录
数据库·mysql·adb
hqxstudying4 小时前
MyBatis 和 MyBatis-Plus对比
java·数据库·mysql·mybatis
DemonAvenger5 小时前
高效JOIN操作:多表关联查询技巧与实战经验分享
数据库·mysql·性能优化
新法国菜16 小时前
MySql知识梳理之DML语句
数据库·mysql
两张不够花19 小时前
Shell脚本源码安装Redis、MySQL、Mongodb、PostgreSQL(无报错版)
linux·数据库·redis·mysql·mongodb·postgresql·云计算
花花无缺21 小时前
MySQL--B+树
后端·mysql
似水流年流不尽思念1 天前
mysql日志文件有哪些,分别介绍下作用 ?
后端·mysql·面试
.Shu.1 天前
Mysql InnoDB 底层架构设计、功能、原理、源码系列合集【五、InnoDB 高阶机制与实战调优】
数据库·mysql