Flink消费Datahub到ClickhouseSink

重点解决批量写入多并行度数组下标越界问题。

一、准备

1、ClickHouse建表

sql 复制代码
CREATE TABLE qualityDB.dwd_stck_xny_sales_otbnd_order_wtrl_sn_dtl_w_df
(
    `ledgr_id` String COMMENT '账套ID',
    `ledgr_nm` String COMMENT '账套名称',
    `ou_id` String COMMENT '公司id',
    `ou_name` String COMMENT '公司名称',
    `sale_out_order_id` String COMMENT '销售出库单',
    `sale_out_order_code` String COMMENT '销售出库单号',
    `sale_out_order_rowno` Int64 COMMENT '销售出库单行号',
    `item_id` String COMMENT '产品名称',
    `item_code` String COMMENT '产品编码',
    `spec` String COMMENT '规格',
    `model` String COMMENT '型号',
    `batch_nmbr` String COMMENT '批次号',
    `prqty` Float64 COMMENT '应收数量',
    `qty` Float64 COMMENT '实收数量',
    `wrhs_id` String COMMENT '入库仓库',
    `wrhs_code` String COMMENT '入库仓库编码',
    `wrhslc_id` String COMMENT '库位',
    `wrhslc_code` String COMMENT '库位编码',
    `state` Int64 COMMENT '库存状态',
    `line_src_type` Int64 COMMENT '来源类型',
    `line_src_code` String COMMENT '来源单据号',
    `line_src_id` String COMMENT '来源单据号ID',
    `create_user_id` String COMMENT '创建人ID',
    `mdfy_user_id` String COMMENT '修改人ID',
    `create_user_name` String COMMENT '创建人名称',
    `mdfy_user_name` String COMMENT '修改人名称',
    `create_date` String COMMENT '创建日期',
    `mdfy_date` Date COMMENT '修改日期',
    `org_id` String COMMENT '组织ID',
    `unt_stock_mainname` String COMMENT '库存单位',
    `prjct_nmbr` String COMMENT '项目号',
    `dlvry_plan_code` String COMMENT '发货计划号',
    `dlvry_plan_id` String COMMENT '发货计划ID',
    `dlvry_plan_line_id` String COMMENT '发货计划明细行ID',
    `erp_batch_nmbr` String COMMENT '计划批次号',
    `out_check_state` Int64 COMMENT '出库检验状态',
    `sale_order_id` String COMMENT '销售订单',
    `sale_order_code` String COMMENT '销售订单编码',
    `new_prjct_nmbr` String COMMENT '新项目号',
    `wtrl_sn_mdfy_date` String COMMENT '物料SN修改日期',
    `wtrl_sn_mdfy_user_id` String COMMENT '物料SN修改人',
    `wtrl_sn_create_user_id` String COMMENT '物料SN创建人',
    `wtrl_sn_line_seq` Int64 COMMENT '物料SN行序号',
    `wtrl_sn_create_date` String COMMENT '物料SN创建日期',
    `bar_code` String COMMENT 'sn',
    `bale_code` String COMMENT '箱号',
    `pallet_code` String COMMENT '托盘码',
    `sale_out_order_line_id` String COMMENT '销售出库单明细',
    `wtrl_sn_batch_nmbr` String COMMENT '物料SN批次号',
    `wtrl_sn_item_id` String COMMENT 'SN物料名称ID',
    `wtrl_sn_item_code` String COMMENT 'SN物料编码',
    `wtrl_sn_qty` Float64 COMMENT '物料SN数量',
    `unit_stock_main_name` String COMMENT '单位',
    `wtrl_sn_out_check_state` Int64 COMMENT '物料SN出库检验状态'
)
ENGINE = ReplacingMergeTree
PARTITION BY toYYYYMM(mdfy_date)
PRIMARY KEY (sale_out_order_code, bar_code)
ORDER BY (sale_out_order_code, bar_code, mdfy_date, wtrl_sn_mdfy_date)
SETTINGS index_granularity = 8192

2、Maven依赖

sql 复制代码
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>com.tbea</groupId>
  <artifactId>data-transfor-quality</artifactId>
  <version>1.0-SNAPSHOT</version>
  <!-- FIXME change it to the project's website -->
  <url>http://www.example.com</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <scala.version>2.12.7</scala.version>
    <scala.binary.version>2.12</scala.binary.version>
    <flink.version>1.17.2</flink.version>
    <java.version>1.8</java.version>
    <vvr.version>1.17-vvr-8.0.4-1</vvr.version>
    <maven.compiler.source>1.8</maven.compiler.source>
    <maven.compiler.target>1.8</maven.compiler.target>
  </properties>

  <dependencies>
<!--    <dependency>-->
<!--      <groupId>com.alibaba.flink</groupId>-->
<!--      <artifactId>datahub-connector</artifactId>-->
<!--      <version>0.1-SNAPSHOT</version>-->
<!--      <classifier>jar-with-dependencies</classifier>-->
<!--    </dependency>-->
      <dependency>
          <groupId>com.alibaba.ververica</groupId>
          <artifactId>ververica-connector-datahub</artifactId>
          <version>1.15-vvr-6.0.7</version>
      </dependency>

<!--      &lt;!&ndash; https://mvnrepository.com/artifact/com.alibaba.ververica/ververica-connector-common &ndash;&gt;-->
<!--      <dependency>-->
<!--          <groupId>com.alibaba.ververica</groupId>-->
<!--          <artifactId>ververica-connector-common</artifactId>-->
<!--          <version>1.17-vvr-8.0.8</version>-->
<!--&lt;!&ndash;          <scope>provided</scope>&ndash;&gt;-->
<!--      </dependency>-->
<!--      <dependency>-->
<!--          <groupId>com.alibaba.ververica</groupId>-->
<!--          <artifactId>ververica-connector-continuous-odps</artifactId>-->
<!--          <version>1.17-vvr-8.0.8</version>-->
<!--          <scope>provided</scope>-->
<!--      </dependency>-->
<!--      <dependency>-->
<!--          <groupId>org.apache.flink</groupId>-->
<!--          <artifactId>flink-connector-base</artifactId>-->
<!--          <version>${flink.version}</version>-->
<!--&lt;!&ndash;          <scope>provided</scope>&ndash;&gt;-->
<!--      </dependency>-->

    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-core -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-core</artifactId>
      <version>${flink.version}</version>
<!--        <scope>provided</scope>-->
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-java</artifactId>
      <version>${flink.version}</version>
<!--        <scope>provided</scope>-->
    </dependency>
      <dependency>
          <groupId>org.apache.flink</groupId>
          <artifactId>flink-table-api-java</artifactId>
          <version>${flink.version}</version>
                  <scope>provided</scope>
      </dependency>
      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
      <dependency>
          <groupId>org.apache.flink</groupId>
          <artifactId>flink-clients</artifactId>
          <version>${flink.version}</version>
<!--          <scope>provided</scope>-->
      </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-streaming-java</artifactId>
      <version>${flink.version}</version>
<!--            <scope>provided</scope>-->
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-runtime -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-runtime</artifactId>
      <version>${flink.version}</version>
<!--            <scope>provided</scope>-->
    </dependency>

      <dependency>
          <groupId>org.apache.flink</groupId>
          <artifactId>flink-runtime-web</artifactId>
          <version>${flink.version}</version>
                      <scope>provided</scope>
      </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-common -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-common</artifactId>
      <version>${flink.version}</version>
      <scope>provided</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-api-java-bridge -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-api-java-bridge</artifactId>
      <version>${flink.version}</version>
      <scope>provided</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-planner -->
    <dependency>
      <groupId>org.apache.flink</groupId>
      <artifactId>flink-table-planner_2.12</artifactId>
      <version>${flink.version}</version>
      <scope>provided</scope>
    </dependency>

    <dependency>
      <groupId>org.scala-lang</groupId>
      <artifactId>scala-library</artifactId>
      <version>2.12.7</version>
            <scope>provided</scope>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.projectlombok/lombok -->
    <dependency>
      <groupId>org.projectlombok</groupId>
      <artifactId>lombok</artifactId>
      <version>1.18.34</version>
<!--            <scope>provided</scope>-->
    </dependency>
    <dependency>
      <groupId>mysql</groupId>
      <artifactId>mysql-connector-java</artifactId>
      <version>8.0.23</version>
        <scope>provided</scope>
    </dependency>
      <!-- https://mvnrepository.com/artifact/com.ververica/flink-connector-debezium -->
      <dependency>
          <groupId>com.ververica</groupId>
          <artifactId>flink-connector-debezium</artifactId>
          <version>2.2.0</version>
          <scope>provided</scope>
      </dependency>

    <dependency>
      <groupId>com.ververica</groupId>
      <artifactId>flink-connector-mysql-cdc</artifactId>
      <version>2.2.0</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
          <groupId>com.ververica</groupId>
          <artifactId>flink-sql-connector-mysql-cdc</artifactId>
          <version>2.2.0</version>
          <scope>provided</scope>
      </dependency>
      <dependency>
          <groupId>com.ververica</groupId>
          <artifactId>flink-connector-oracle-cdc</artifactId>
          <version>2.2.0</version>
                <scope>provided</scope>
      </dependency>
      <dependency>
          <groupId>com.ververica</groupId>
          <artifactId>flink-sql-connector-oracle-cdc</artifactId>
          <version>2.2.0</version>
          <scope>provided</scope>
      </dependency>
      <dependency>
          <groupId>com.oracle.database.jdbc</groupId>
          <artifactId>ojdbc8</artifactId>
          <version>19.3.0.0</version>
          <scope>provided</scope>
      </dependency>
      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-filesystem -->
      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-jdbc -->
      <dependency>
          <groupId>org.apache.flink</groupId>
          <artifactId>flink-connector-jdbc</artifactId>
          <version>3.1.0-1.17</version>
          <scope>provided</scope>
      </dependency>
      <!-- flink-doris-connector -->
      <!-- https://mvnrepository.com/artifact/org.apache.doris/flink-doris-connector-1.17 -->
      <dependency>
          <groupId>org.apache.doris</groupId>
          <artifactId>flink-doris-connector-1.17</artifactId>
          <version>24.1.0</version>
          <scope>provided</scope>
      </dependency>


      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
      <dependency>
          <groupId>org.apache.flink</groupId>
          <artifactId>flink-connector-kafka</artifactId>
          <version>${flink.version}</version>
          <scope>provided</scope>
      </dependency>
      <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-hive -->
      <dependency>
          <groupId>org.apache.flink</groupId>
          <artifactId>flink-connector-hive_2.12</artifactId>
          <version>${flink.version}</version>
          <scope>provided</scope>
      </dependency>

      <!-- https://mvnrepository.com/artifact/com.alibaba/druid -->
      <dependency>
          <groupId>com.alibaba</groupId>
          <artifactId>druid</artifactId>
          <version>1.2.24</version>
      </dependency>
      <!-- https://mvnrepository.com/artifact/com.clickhouse/clickhouse-jdbc -->
      <!-- https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc -->
      <dependency>
          <groupId>ru.yandex.clickhouse</groupId>
          <artifactId>clickhouse-jdbc</artifactId>
          <version>0.3.2</version>
      </dependency>

      <dependency>
          <groupId>org.slf4j</groupId>
          <artifactId>slf4j-simple</artifactId>
          <version>1.7.36</version>
<!--                    <scope>provided</scope>-->
      </dependency>
  </dependencies>
    <build>
        <sourceDirectory>${basedir}/src/main/java</sourceDirectory>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.13.0</version>
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <compilerVersion>1.8</compilerVersion>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>3.0.0</version>
                <configuration>
                    <archive>
                        <manifest>
                            <mainClass></mainClass>
                        </manifest>
                    </archive>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>single</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
</project>

3、DatahubSourceFunction

sql 复制代码
package com.tbea.datahub;

import com.alibaba.ververica.connectors.datahub.source.DatahubSourceFunction;
import com.tbea.pojo.DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo;
import com.tbea.sink.ClickHouseBatchSink;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import shaded.datahub.com.aliyun.datahub.client.model.RecordEntry;
import shaded.datahub.com.aliyun.datahub.client.model.TupleRecordData;

import java.io.Serializable;
import java.math.BigDecimal;

import java.sql.Date;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.concurrent.TimeUnit;


public class DwdStckXnySalesOtbndOrderWtrlSnDtlwDf implements Serializable {
    private static final long serialVersionUID = 1L;

    private static final String ENDPOINT = "https://datahub.cn-beijing-tbdg-d01.dh.res.bigdata.XXXXXXXXXX.com";;
    private static final String PROJECT_NAME = "bigdata";
    private static final String TOPIC_NAME = "dwd_stck_xny_sales_otbnd_order_wtrl_sn_dtl_w_df";
    private static final String SUB_ID = "";
    private static final String ACCESS_ID = "XXXXXXX";
    private static final String ACCESS_KEY = "XXXXXXXXXX";
    private static final String RUN_MODE = "public";
    private static final boolean ENABLE_SCHEMA_REGISTRY = false;
    private static int cnt=0;

    private static Long datahubStartInMs = 0L;//设置消费的启动位点对应的时间。
    private static Long datahubEndInMs=Long.MAX_VALUE;


    public void runExample() throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(3);

        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
                3, // 尝试重启的最大次数
                Time.of(10, TimeUnit.SECONDS) // 每次重启之间的延迟
        ));
        //指定checkpoint之间的时间间隔,单位是毫秒(ms)
        env.enableCheckpointing(10000L, CheckpointingMode.EXACTLY_ONCE);
        // 获取当前日期(不含时间)
        LocalDate today = LocalDate.now();
        // 转换为当天的零点时间戳(默认时区)
        long midnightTimestamp = today.atStartOfDay(ZoneId.systemDefault())
                .toInstant()
                .toEpochMilli();
        System.out.println(today);
        System.out.println(midnightTimestamp);
        DatahubSourceFunction datahubSource =
                new DatahubSourceFunction(
                        ENDPOINT,
                        PROJECT_NAME,
                        TOPIC_NAME,
                        SUB_ID,
                        ACCESS_ID,
                        ACCESS_KEY,
                        RUN_MODE,
                        ENABLE_SCHEMA_REGISTRY,
                        midnightTimestamp,
                        datahubEndInMs);
        datahubSource.setRequestTimeout(30 * 1000);
        datahubSource.enableExitAfterReadFinished();


        SingleOutputStreamOperator<DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo> dwdStckXnySalesOtbndOrderWtrlSnDtlwDf
                = env.addSource(datahubSource)
                .map(new MapFunction<RecordEntry, DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo>() {
                    @Override
                    public DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo map(RecordEntry recordEntry) throws Exception {


                        DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo dwdStckXnySalesOtbndOrderWtrlSnDtlwDf = new DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo();
                        TupleRecordData recordData = (TupleRecordData) (recordEntry.getRecordData());
//                        System.out.println("已处理数据条数:"+String.valueOf(cnt+=1));
//                        System.out.println("数据上传时间:"+(String) recordData.getField(82));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLedgrId((String) recordData.getField(0));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLedgrNm((String) recordData.getField(1));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setOuId((String) recordData.getField(2));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setOuName((String) recordData.getField(3));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOutOrderId((String) recordData.getField(4));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOutOrderCode((String) recordData.getField(5));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOutOrderRowno((Long) recordData.getField(6));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setItemId((String) recordData.getField(8));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setItemCode((String) recordData.getField(9));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSpec((String) recordData.getField(10));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setModel((String) recordData.getField(11));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setBatchNmbr((String) recordData.getField(12));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setPrqty((BigDecimal) recordData.getField(13));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setQty((BigDecimal) recordData.getField(14));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWrhsId((String) recordData.getField(19));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWrhsCode((String) recordData.getField(20));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWrhslcId((String) recordData.getField(21));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWrhslcCode((String) recordData.getField(22));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setState((Long) recordData.getField(23));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLineSrcType((Long) recordData.getField(24));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLineSrcCode((String) recordData.getField(25));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLineSrcId((String) recordData.getField(26));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setCreateUserId((String) recordData.getField(30));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setMdfyUserId((String) recordData.getField(31));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setCreateUserName((String) recordData.getField(32));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setMdfyUserName((String) recordData.getField(33));
                        //long类型时间转date
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setCreateDate(null==recordData.getField(34)?null:String.valueOf(new Date((Long) recordData.getField(34)/1000)));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setMdfyDate(null==recordData.getField(35)?null: new Date((Long) recordData.getField(35)/1000));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setOrgId((String) recordData.getField(40));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setUntStockMainname((String) recordData.getField(48));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setPrjctNmbr((String) recordData.getField(50));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setDlvryPlanCode((String) recordData.getField(54));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setDlvryPlanId((String) recordData.getField(55));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setDlvryPlanLineId((String) recordData.getField(56));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setErpBatchNmbr((String) recordData.getField(57));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setOutCheckState((Long) recordData.getField(58));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOrderId((String) recordData.getField(60));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOrderCode((String) recordData.getField(61));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setNewPrjctNmbr((String) recordData.getField(63));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnMdfyDate(null==recordData.getField(65)?null:String.valueOf(new Date((Long) recordData.getField(65)/1000)));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnMdfyUserId((String) recordData.getField(66));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnCreateUserId((String) recordData.getField(67));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnLineSeq((Long) recordData.getField(68));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnCreateDate(null==recordData.getField(69)?null:String.valueOf(new Date((Long) recordData.getField(69)/1000)));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setBarCode((String) recordData.getField(70));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setBaleCode((String) recordData.getField(71));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setPalletCode((String) recordData.getField(72));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOutOrderLineId((String) recordData.getField(73));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnBatchNmbr((String) recordData.getField(74));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnItemId((String) recordData.getField(75));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnItemCode((String) recordData.getField(76));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnQty((BigDecimal) recordData.getField(77));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setUnitStockMainName((String) recordData.getField(78));
                        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnOutCheckState((Long) recordData.getField(79));
                        return dwdStckXnySalesOtbndOrderWtrlSnDtlwDf;
                    }
                });

//        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.print();
        dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.addSink(new ClickHouseBatchSink(
                "jdbc:clickhouse://10.10.10.104:8123/qualityDB",
                10000
        ));
        env.execute(DwdStckXnySalesOtbndOrderWtrlSnDtlwDf.class.getName());

    }

    public static void main(String[] args) throws Exception {
        DwdStckXnySalesOtbndOrderWtrlSnDtlwDf sourceFunctionExample = new DwdStckXnySalesOtbndOrderWtrlSnDtlwDf();
        sourceFunctionExample.runExample();
    }
}

4、ClickHouseBatchSink

重点引入ReentrantLock ,处理高并行度处理数据是,批写入时可能造成ArrayList.size>batchSize,从而报错Caused by: java.lang.ArrayIndexOutOfBoundsException;

java 复制代码
package com.tbea.sink;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHouseDataSource;
import ru.yandex.clickhouse.ClickHousePreparedStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;
import com.tbea.pojo.DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo;

import java.sql.Date;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.locks.ReentrantLock;

public class ClickHouseBatchSink extends RichSinkFunction<DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo> {
    private transient ClickHouseDataSource dataSource;
    private transient ClickHouseConnection connection;
    private transient ClickHousePreparedStatement preparedStatement;
    // 使用ThreadLocal确保线程安全
    private static final ThreadLocal<SimpleDateFormat> dateFormatThreadLocal =
            ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd"));
    private final String jdbcUrl;
    private final int batchSize;
    //可重入互斥锁
    private final ReentrantLock lock;

    private List<DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo> batchList;

    public ClickHouseBatchSink(String jdbcUrl,  int batchSize) {
        this.jdbcUrl = jdbcUrl;
        this.batchSize = batchSize;
        this.lock = new ReentrantLock();
    }

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);
        ClickHouseProperties properties = new ClickHouseProperties();
        properties.setUser("default");
        properties.setPassword("1111111");
        this.dataSource = new ClickHouseDataSource(jdbcUrl, properties);
        this.connection = dataSource.getConnection().unwrap(ClickHouseConnection.class);

        String sql = "INSERT INTO dwd_stck_xny_sales_otbnd_order_wtrl_sn_dtl_w_df(" +
                "ledgr_id\n" +
                ",ledgr_nm\n" +
                ",ou_id\n" +
                ",ou_name\n" +
                ",sale_out_order_id\n" +
                ",sale_out_order_code\n" +
                ",sale_out_order_rowno\n" +
                ",item_id\n" +
                ",item_code\n" +
                ",spec\n" +
                ",model\n" +
                ",batch_nmbr\n" +
                ",prqty\n" +
                ",qty\n" +
                ",wrhs_id\n" +
                ",wrhs_code\n" +
                ",wrhslc_id\n" +
                ",wrhslc_code\n" +
                ",state\n" +
                ",line_src_type\n" +
                ",line_src_code\n" +
                ",line_src_id\n" +
                ",create_user_id\n" +
                ",mdfy_user_id\n" +
                ",create_user_name\n" +
                ",mdfy_user_name\n" +
                ",create_date\n" +
                ",mdfy_date\n" +
                ",org_id\n" +
                ",unt_stock_mainname\n" +
                ",prjct_nmbr\n" +
                ",dlvry_plan_code\n" +
                ",dlvry_plan_id\n" +
                ",dlvry_plan_line_id\n" +
                ",erp_batch_nmbr\n" +
                ",out_check_state\n" +
                ",sale_order_id\n" +
                ",sale_order_code\n" +
                ",new_prjct_nmbr\n" +
                ",wtrl_sn_mdfy_date\n" +
                ",wtrl_sn_mdfy_user_id\n" +
                ",wtrl_sn_create_user_id\n" +
                ",wtrl_sn_line_seq\n" +
                ",wtrl_sn_create_date\n" +
                ",bar_code\n" +
                ",bale_code\n" +
                ",pallet_code\n" +
                ",sale_out_order_line_id\n" +
                ",wtrl_sn_batch_nmbr\n" +
                ",wtrl_sn_item_id\n" +
                ",wtrl_sn_item_code\n" +
                ",wtrl_sn_qty\n" +
                ",unit_stock_main_name\n" +
                ",wtrl_sn_out_check_state) " +
                "VALUES (?,?,?,?,?,?,?,?,?,?\n" +
                ",?,?,?,?,?,?,?,?,?,?\n" +
                ",?,?,?,?,?,?,?,?,?,?\n" +
                ",?,?,?,?,?,?,?,?,?,?\n" +
                ",?,?,?,?,?,?,?,?,?,?\n" +
                ",?,?,?,?)";
        this.preparedStatement = (ClickHousePreparedStatement) connection.prepareStatement(sql);
        this.preparedStatement.setMaxRows(batchSize);

        this.batchList = new ArrayList<>(batchSize);
    }

    @Override
    public void invoke(DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo value, Context context) throws Exception {
        //并行度 > 1 时,多个线程可能同时调用 invoke() 方法,很可能造成batchList.size() > batchSize
        lock.lock();
        try {
            batchList.add(value);
            if (batchList.size() >= batchSize) {
                flush();
            }
        } finally {
            lock.unlock();
        }
    }

    @Override
    public void close() throws Exception {
        lock.lock();
        try {
            if (!batchList.isEmpty()) {
                flush();
            }
        } finally {
            lock.unlock();
        }

        // 清理资源
        try {
            if (preparedStatement != null) {
                preparedStatement.close();
            }
        } catch (Exception e) {
            System.err.println("Error closing prepared statement: " + e.getMessage());
        }

        try {
            if (connection != null && !connection.isClosed()) {
                connection.close();
            }
        } catch (Exception e) {
            System.err.println("Error closing connection: " + e.getMessage());
        }

        // 清理ThreadLocal
        dateFormatThreadLocal.remove();

        super.close();
    }

    private void flush() throws Exception {
        if (batchList.isEmpty()) {
            return;
        }
        try {
            for (DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo value : batchList) {
                preparedStatement.setString(1, value.getLedgrId());
                preparedStatement.setString(2, value.getLedgrNm());
                preparedStatement.setString(3, value.getOuId());
                preparedStatement.setString(4, value.getOuName());
                preparedStatement.setString(5, value.getSaleOutOrderId());
                preparedStatement.setString(6, value.getSaleOutOrderCode());
                preparedStatement.setObject(7, null==value.getSaleOutOrderRowno()?null:value.getSaleOutOrderRowno());
                preparedStatement.setString(8, value.getItemId());
                preparedStatement.setString(9, value.getItemCode());
                preparedStatement.setString(10, value.getSpec());
                preparedStatement.setString(11, value.getModel());
                preparedStatement.setString(12, value.getBatchNmbr());
                preparedStatement.setBigDecimal(13, value.getPrqty());
                preparedStatement.setBigDecimal(14, value.getQty());
                preparedStatement.setString(15, value.getWrhsId());
                preparedStatement.setString(16, value.getWrhsCode());
                preparedStatement.setString(17, value.getWrhslcId());
                preparedStatement.setString(18, value.getWrhslcCode());
                preparedStatement.setObject(19, null==value.getState()?null:value.getState());
                preparedStatement.setObject(20, null==value.getLineSrcType()?null:value.getLineSrcType());
                preparedStatement.setString(21, value.getLineSrcCode());
                preparedStatement.setString(22, value.getLineSrcId());
                preparedStatement.setString(23, value.getCreateUserId());
                preparedStatement.setString(24, value.getMdfyUserId());
                preparedStatement.setString(25, value.getCreateUserName());
                preparedStatement.setString(26, value.getMdfyUserName());
                preparedStatement.setString(27, String.valueOf(value.getCreateDate()));
                if(null!=value.getMdfyDate()){
                    Date mdfyDate = value.getMdfyDate();
                    String formatmdfyDate = dateFormatThreadLocal.get().format(mdfyDate);
                    preparedStatement.setDate(28, Date.valueOf(formatmdfyDate));
                }else {
                    preparedStatement.setDate(28, null);
                }
                preparedStatement.setString(29, value.getOrgId());
                preparedStatement.setString(30, value.getUntStockMainname());
                preparedStatement.setString(31, value.getPrjctNmbr());
                preparedStatement.setString(32, value.getDlvryPlanCode());
                preparedStatement.setString(33, value.getDlvryPlanId());
                preparedStatement.setString(34, value.getDlvryPlanLineId());
                preparedStatement.setString(35, value.getErpBatchNmbr());
                preparedStatement.setObject(36, null==value.getOutCheckState()?null:value.getOutCheckState());
                preparedStatement.setString(37, value.getSaleOrderId());
                preparedStatement.setString(38, value.getSaleOrderCode());
                preparedStatement.setString(39, value.getNewPrjctNmbr());
                preparedStatement.setString(40, String.valueOf(value.getWtrlSnMdfyDate()));
                preparedStatement.setString(41, value.getWtrlSnMdfyUserId());
                preparedStatement.setString(42, value.getWtrlSnCreateUserId());
                preparedStatement.setObject(43, null==value.getWtrlSnLineSeq()?null:value.getWtrlSnLineSeq());
                preparedStatement.setString(44, String.valueOf(value.getWtrlSnMdfyDate()));
                preparedStatement.setString(45, value.getBarCode());
                preparedStatement.setString(46, value.getBaleCode());
                preparedStatement.setString(47, value.getPalletCode());
                preparedStatement.setString(48, value.getSaleOutOrderLineId());
                preparedStatement.setString(49, value.getWtrlSnBatchNmbr());
                preparedStatement.setString(50, value.getWtrlSnItemId());
                preparedStatement.setString(51, value.getWtrlSnItemCode());
                preparedStatement.setBigDecimal(52, value.getWtrlSnQty());
                preparedStatement.setString(53, value.getUnitStockMainName());
                preparedStatement.setObject(54,  null==value.getWtrlSnOutCheckState()?null:value.getWtrlSnOutCheckState());

                preparedStatement.addBatch();
            }

            int[] ints = preparedStatement.executeBatch();
            System.out.println(ints.length);
            preparedStatement.clearBatch();
            batchList.clear();
        } catch (Exception e) {
            System.err.println("Error executing batch insert: " + e.getMessage());
            // 可以根据需要决定是重试还是抛出异常
            flushWithRetry();
//            throw e;
        } finally {
            batchList.clear();
        }
    }

    private void flushWithRetry() throws Exception {
        int retryCount = 0;
        int maxRetries = 3;

        while (retryCount < maxRetries) {
            try {
                flush();
                return;
            } catch (Exception e) {
                retryCount++;
                if (retryCount >= maxRetries) {
                    throw e;
                }
                Thread.sleep(1000 * retryCount); // 指数退避
            }
        }
    }
}

5、工具类

java 复制代码
package com.tbea.utils;

import com.alibaba.druid.pool.DruidDataSource;

import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;

public class ClickHouseConnUtil {
    private static volatile DataSource dataSource;
    private static  String url = "jdbc:clickhouse://10.10.10.104:8123/qualityDB";
    private static  String username = "default"; // ClickHouse用户名,默认为default
    private static  String password = "111111"; // ClickHouse密码,默认为空
    private static   Connection connection;

    public static Connection getClickHouseConn() throws ClassNotFoundException, SQLException {
        Class.forName("ru.yandex.clickhouse.ClickHouseDriver");
        connection = DriverManager.getConnection(url, username, password);
        return connection;
    }

    public static DataSource getClickHouseDataSource() {
        if (dataSource == null) {
            synchronized (ClickHouseConnUtil.class) {
                if (dataSource == null) {
                    DruidDataSource druidDataSource = new DruidDataSource();

                    // 基本配置
                    druidDataSource.setDriverClassName("ru.yandex.clickhouse.ClickHouseDriver");
                    druidDataSource.setUrl(url);
                    druidDataSource.setUsername(username);
                    druidDataSource.setPassword(password);

                    // 连接池配置
                    druidDataSource.setInitialSize(5);
                    druidDataSource.setMinIdle(5);
                    druidDataSource.setMaxActive(20);
                    druidDataSource.setMaxWait(60000);
                    druidDataSource.setTimeBetweenEvictionRunsMillis(60000);
                    druidDataSource.setMinEvictableIdleTimeMillis(300000);
                    druidDataSource.setTestWhileIdle(true);
                    druidDataSource.setTestOnBorrow(false);
                    druidDataSource.setTestOnReturn(false);

                    dataSource = druidDataSource;
                }
            }
        }
        return dataSource;
    }
}
相关推荐
知识分享小能手2 小时前
React学习教程,从入门到精通,React 前后端交互技术详解(29)
前端·javascript·vue.js·学习·react.js·前端框架·react
天天进步20152 小时前
React Server Components详解:服务端渲染的新纪元
开发语言·前端·javascript
lvchaoq2 小时前
react的依赖项数组
前端·javascript·react.js
观望过往2 小时前
Spring Boot 高级特性:从原理到企业级实战
java·spring boot
喂完待续2 小时前
【序列晋升】38 Spring Data MongoDB 的统一数据访问范式与实践
java·spring·spring cloud·big data·序列晋升
郑洁文2 小时前
上门代管宠物系统的设计与实现
java·spring boot·后端·毕业设计·毕设
Yeats_Liao2 小时前
Java网络编程(一):从BIO到NIO的技术演进
java·网络·nio
James. 常德 student3 小时前
华为 ai 机考 编程题解答
java·人工智能·华为
笨手笨脚の3 小时前
设计模式-原型模式
java·设计模式·创建型设计模式·原型模式