java实现运行SQL脚本完成数据迁移

需求背景

每个月初需要通过运行多个不同产量报表的SQL脚本,将产量数据同步落表到另一个系统的数据库表里面。将产量数据读出来后某些数据还要先进行一些特定处理才能进行落表。

技术方案

用java编程实现。

1、整体架构设计

读取SQL文件 -> 2. 执行SQL获取数据 -> 3. 数据转换处理 -> 4. 分批批量插入。

2、完整代码实例

xml 复制代码
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-test</artifactId>
    <scope>test</scope>
</dependency>
<dependency>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
    <groupId>com.oracle.database.jdbc</groupId>
    <artifactId>ojdbc8</artifactId>
    <version>21.5.0.0</version>
</dependency>
java 复制代码
package com.xbhog.dataSync;

import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.*;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import javax.sql.DataSource;

import lombok.Data;
import org.springframework.dao.DataAccessException;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.BatchPreparedStatementSetter;
import org.springframework.jdbc.datasource.DriverManagerDataSource;
import org.springframework.transaction.annotation.Transactional;

public class SqlFileBatchProcessor {

    // 配置数据源
    private DataSource sourceDataSource;
    private DataSource targetDataSource;
    private JdbcTemplate jdbcTemplate;

    public SqlFileBatchProcessor() {

    }

    // 主迁移方法
    @Transactional
    public void migrateData(DataSource sourceDs, DataSource targetDs,
                            String sqlFilePath, int batchSize) throws SQLException, IOException {
        // 读取SQL文件
        String sql = readSqlFile(sqlFilePath);
        System.out.println("读取SQL文件内容:\n" + sql);

        int totalProcessed = 0;

        try (Connection sourceConn = sourceDs.getConnection();
             Connection targetConn = targetDs.getConnection()) {

            // 获取数据库类型
            String dbType = getDatabaseType(sourceConn);

            // 配置流式查询
            try (Statement stmt = createStreamingStatement(sourceConn, dbType);
                 ResultSet rs = stmt.executeQuery(sql)) {

                List<TargetData> currentBatch = new ArrayList<>(batchSize);

                // 流式处理结果集
                while (rs.next()) {
                    SourceData sourceData = new SourceData();
                    // 根据实际表结构映射字段
                    sourceData.setId(rs.getInt("id"));
                    sourceData.setName(rs.getString("name"));
                    sourceData.setValue(rs.getString("raw_value"));

                    // 数据转换
                    TargetData target = transformData(sourceData);
                    currentBatch.add(target);

                    // 达到批次大小时执行插入
                    if (currentBatch.size() >= batchSize) {
                        batchInsertWithRetry(targetConn,"",currentBatch,3);
                        totalProcessed += currentBatch.size();
                        System.out.println("已处理: " + totalProcessed + " 条数据。");
                        currentBatch.clear();
                    }
                }

                // 处理剩余不足一个批次的数据
                if (!currentBatch.isEmpty()) {
                    batchInsertWithRetry(targetConn,"",currentBatch,3);
                    totalProcessed += currentBatch.size();
                    System.out.println("完成所有数据处理,共 " + totalProcessed + " 条");
                }
            }
        }
    }

    // 创建适合不同数据库的流式Statement
    private Statement createStreamingStatement(Connection conn, String dbType) throws SQLException {

        Statement stmt = conn.createStatement(
                ResultSet.TYPE_FORWARD_ONLY,
                ResultSet.CONCUR_READ_ONLY);

        switch (dbType) {
            case "MySQL":
                // MySQL流式配置
                stmt.setFetchSize(Integer.MIN_VALUE);
                break;

            case "Oracle":
                // Oracle流式配置
                try {
                    stmt.setFetchSize(500);
                    // 使用Oracle特有优化(如果驱动可用)
                    if (conn.isWrapperFor(oracle.jdbc.OracleConnection.class)) {
                        conn.unwrap(oracle.jdbc.OracleConnection.class)
                                .setDefaultRowPrefetch(500);
                    }
                } catch (NoClassDefFoundError e) {
                    System.out.println("Oracle特有优化不可用,使用标准JDBC设置");
                }
                break;

            case "SQL Server":
                // SQL Server流式配置
                stmt.setFetchSize(1000);
                break;

            default:
                // 其他数据库默认设置
                stmt.setFetchSize(500);
        }

        return stmt;
    }

    // 读取SQL文件内容
    private String readSqlFile(String filePath) throws IOException {
        return new String(Files.readAllBytes(Paths.get(filePath)));
    }

    // 识别数据库类型
    private String getDatabaseType(Connection conn) throws SQLException {
        String url = conn.getMetaData().getURL().toLowerCase();

        if (url.contains(":oracle:")) return "Oracle";
        if (url.contains(":mysql:")) return "MySQL";
        if (url.contains(":sqlserver:") || url.contains(":microsoft:")) return "SQL Server";

        throw new SQLException("不支持的数据库类型: " + url);
    }

    // todo 数据转换处理逻辑
    private TargetData transformData(SourceData source) {
        TargetData target = new TargetData();

        // 示例转换逻辑 - 根据实际需求修改
        target.setId(source.getId());
        target.setName(source.getName().trim().toUpperCase());

        // 复杂清洗逻辑示例
        String rawValue = source.getValue();
        if (rawValue != null && rawValue.contains("|")) {
            String[] parts = rawValue.split("\\|");
            target.setValue(parts[0]);
        } else {
            target.setValue(rawValue);
        }

        // 可以添加更多转换逻辑...

        return target;
    }

    // 在batchInsert中添加重试机制
    private void batchInsertWithRetry(Connection conn, String tableName, List<TargetData> batch, int maxRetries) {
        int attempt = 0;
        while (attempt <= maxRetries) {
            try {
                batchInsert(conn,tableName,batch);
                return;
            } catch (DataAccessException e) {
                attempt++;
                if (attempt > maxRetries) {
                    throw e;
                }
                System.out.println("插入失败,准备重试 (" + attempt + "/" + maxRetries + ")");
                try {
                    Thread.sleep(1000 * attempt); // 指数退避
                } catch (InterruptedException ie) {
                    Thread.currentThread().interrupt();
                    throw new RuntimeException("重试中断", ie);
                }
            } catch (SQLException e) {
                throw new RuntimeException(e);
            }
        }
    }

    // 批量插入方法(兼容不同数据库语法)
    private void batchInsert(Connection conn, String tableName, List<TargetData> batch) throws SQLException {

        if (batch.isEmpty()) return;

        String insertSql = generateInsertSql(tableName);

        try (PreparedStatement pstmt = conn.prepareStatement(insertSql)) {
            for (TargetData record : batch) {
                bindInsertParameters(pstmt, record);
                pstmt.addBatch();
            }
            pstmt.executeBatch();
        }
    }

    // 生成INSERT SQL(考虑不同数据库语法差异)
    private String generateInsertSql(String tableName) {
        // 实际应根据record字段动态生成
        return "INSERT INTO " + tableName + " (id, name, value) VALUES (?, ?, ?)";
    }

    // 绑定参数(根据实际数据结构实现)
    private void bindInsertParameters(PreparedStatement pstmt, TargetData record)
            throws SQLException {
        pstmt.setInt(1, record.getId());
        pstmt.setString(2, record.getName());
        pstmt.setObject(3, record.getValue()); // 通用类型
    }


    // 创建数据源(生产环境应使用连接池如HikariCP)
    private static DataSource createDataSource(String url, String user, String pass) {
        return new DataSource() {
            @Override
            public PrintWriter getLogWriter() throws SQLException {
                return null;
            }

            @Override
            public void setLogWriter(PrintWriter out) throws SQLException {

            }

            @Override
            public void setLoginTimeout(int seconds) throws SQLException {

            }

            @Override
            public int getLoginTimeout() throws SQLException {
                return 0;
            }

            @Override
            public Logger getParentLogger() throws SQLFeatureNotSupportedException {
                return null;
            }

            @Override
            public <T> T unwrap(Class<T> iface) throws SQLException {
                return null;
            }

            @Override
            public boolean isWrapperFor(Class<?> iface) throws SQLException {
                return false;
            }

            @Override
            public Connection getConnection() throws SQLException {
                return DriverManager.getConnection(url, user, pass);
            }

            @Override
            public Connection getConnection(String username, String password) throws SQLException {
                return DriverManager.getConnection(url, user, pass);
            }

        };
    }

    // 数据实体类
    @Data
    public static class SourceData {
        private int id;
        private String name;
        private String value;
        // getters and setters...
    }

    @Data
    public static class TargetData {
        private int id;
        private String name;
        private String value;
        // getters and setters...
    }

    // 主方法
    public static void main(String[] args) {
        // 配置源数据库(Oracle)
        DataSource oracleDs = createDataSource(
                "jdbc:oracle:thin:@localhost:1521:ORCL",
                "user", "password");

        // 配置源数据库(sqlserver)
        DataSource sqlServerDs = SQLServer2008DataSource.createHikariDataSource();

        // 配置目标数据库(MySQL)
        DataSource mysqlDs = createDataSource(
                "jdbc:mysql://localhost:3306/target_db?useSSL=false",
                "user", "password");

        SqlFileBatchProcessor migrator = new SqlFileBatchProcessor();
        try {
            migrator.migrateData(oracleDs, mysqlDs, "/usr/app/sql/aaa.sql", 5000);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
数据库连接池
java 复制代码
package com.xbhog.dataSync;

import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import javax.sql.DataSource;
public class SQLServer2008DataSource {

    public static DataSource createHikariDataSource() {
        HikariConfig config = new HikariConfig();

        // 基本配置
        config.setDriverClassName("com.microsoft.sqlserver.jdbc.SQLServerDriver");
        config.setJdbcUrl("jdbc:sqlserver://服务器IP:1433;databaseName=数据库名");
        config.setUsername("用户名");
        config.setPassword("密码");

        // SQL Server 2008专用优化参数
        config.addDataSourceProperty("sendStringParametersAsUnicode", "false");
        config.addDataSourceProperty("selectMethod", "cursor");
        config.addDataSourceProperty("responseBuffering", "adaptive");
        config.addDataSourceProperty("packetSize", "4096"); // 2008默认包大小

        // 连接池配置(根据服务器性能调整)
        config.setMaximumPoolSize(15);  // 2008建议不超过20
        config.setMinimumIdle(3);
        config.setConnectionTimeout(30000);  // 30秒
        config.setIdleTimeout(600000);      // 10分钟
        config.setMaxLifetime(1800000);     // 30分钟
        config.setConnectionTestQuery("SELECT 1 FROM sys.objects");

        return new HikariDataSource(config);
    }
}
相关推荐
大得36919 分钟前
django的数据库原生操作sql
数据库·sql·django
tuokuac20 分钟前
SQL中的HAVING用法
数据库·sql
jnrjian20 分钟前
利用trigger对大表在线同步 UDI
数据库·sql
超级晒盐人2 小时前
用落霞归雁的思维框架推导少林寺用什么数据库?
java·python·系统架构·学习方法·教育电商
岁忧2 小时前
(LeetCode 面试经典 150 题) 138. 随机链表的复制 (哈希表)
java·c++·leetcode·链表·面试·go
鹦鹉0072 小时前
IO流中的字节流
java·开发语言·后端
你我约定有三2 小时前
分布式微服务--Nacos作为配置中心(二)
java·分布式·spring cloud·微服务·架构·wpf·负载均衡
apocelipes2 小时前
atomic不是免费午餐
java·性能优化·golang·并发
A了LONE2 小时前
cv弹窗,退款确认弹窗
java·服务器·前端