重点解决批量写入多并行度数组下标越界问题。
一、准备
1、ClickHouse建表
sql
CREATE TABLE qualityDB.dwd_stck_xny_sales_otbnd_order_wtrl_sn_dtl_w_df
(
`ledgr_id` String COMMENT '账套ID',
`ledgr_nm` String COMMENT '账套名称',
`ou_id` String COMMENT '公司id',
`ou_name` String COMMENT '公司名称',
`sale_out_order_id` String COMMENT '销售出库单',
`sale_out_order_code` String COMMENT '销售出库单号',
`sale_out_order_rowno` Int64 COMMENT '销售出库单行号',
`item_id` String COMMENT '产品名称',
`item_code` String COMMENT '产品编码',
`spec` String COMMENT '规格',
`model` String COMMENT '型号',
`batch_nmbr` String COMMENT '批次号',
`prqty` Float64 COMMENT '应收数量',
`qty` Float64 COMMENT '实收数量',
`wrhs_id` String COMMENT '入库仓库',
`wrhs_code` String COMMENT '入库仓库编码',
`wrhslc_id` String COMMENT '库位',
`wrhslc_code` String COMMENT '库位编码',
`state` Int64 COMMENT '库存状态',
`line_src_type` Int64 COMMENT '来源类型',
`line_src_code` String COMMENT '来源单据号',
`line_src_id` String COMMENT '来源单据号ID',
`create_user_id` String COMMENT '创建人ID',
`mdfy_user_id` String COMMENT '修改人ID',
`create_user_name` String COMMENT '创建人名称',
`mdfy_user_name` String COMMENT '修改人名称',
`create_date` String COMMENT '创建日期',
`mdfy_date` Date COMMENT '修改日期',
`org_id` String COMMENT '组织ID',
`unt_stock_mainname` String COMMENT '库存单位',
`prjct_nmbr` String COMMENT '项目号',
`dlvry_plan_code` String COMMENT '发货计划号',
`dlvry_plan_id` String COMMENT '发货计划ID',
`dlvry_plan_line_id` String COMMENT '发货计划明细行ID',
`erp_batch_nmbr` String COMMENT '计划批次号',
`out_check_state` Int64 COMMENT '出库检验状态',
`sale_order_id` String COMMENT '销售订单',
`sale_order_code` String COMMENT '销售订单编码',
`new_prjct_nmbr` String COMMENT '新项目号',
`wtrl_sn_mdfy_date` String COMMENT '物料SN修改日期',
`wtrl_sn_mdfy_user_id` String COMMENT '物料SN修改人',
`wtrl_sn_create_user_id` String COMMENT '物料SN创建人',
`wtrl_sn_line_seq` Int64 COMMENT '物料SN行序号',
`wtrl_sn_create_date` String COMMENT '物料SN创建日期',
`bar_code` String COMMENT 'sn',
`bale_code` String COMMENT '箱号',
`pallet_code` String COMMENT '托盘码',
`sale_out_order_line_id` String COMMENT '销售出库单明细',
`wtrl_sn_batch_nmbr` String COMMENT '物料SN批次号',
`wtrl_sn_item_id` String COMMENT 'SN物料名称ID',
`wtrl_sn_item_code` String COMMENT 'SN物料编码',
`wtrl_sn_qty` Float64 COMMENT '物料SN数量',
`unit_stock_main_name` String COMMENT '单位',
`wtrl_sn_out_check_state` Int64 COMMENT '物料SN出库检验状态'
)
ENGINE = ReplacingMergeTree
PARTITION BY toYYYYMM(mdfy_date)
PRIMARY KEY (sale_out_order_code, bar_code)
ORDER BY (sale_out_order_code, bar_code, mdfy_date, wtrl_sn_mdfy_date)
SETTINGS index_granularity = 8192
2、Maven依赖
sql
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.tbea</groupId>
<artifactId>data-transfor-quality</artifactId>
<version>1.0-SNAPSHOT</version>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<scala.version>2.12.7</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<flink.version>1.17.2</flink.version>
<java.version>1.8</java.version>
<vvr.version>1.17-vvr-8.0.4-1</vvr.version>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
</properties>
<dependencies>
<!-- <dependency>-->
<!-- <groupId>com.alibaba.flink</groupId>-->
<!-- <artifactId>datahub-connector</artifactId>-->
<!-- <version>0.1-SNAPSHOT</version>-->
<!-- <classifier>jar-with-dependencies</classifier>-->
<!-- </dependency>-->
<dependency>
<groupId>com.alibaba.ververica</groupId>
<artifactId>ververica-connector-datahub</artifactId>
<version>1.15-vvr-6.0.7</version>
</dependency>
<!-- <!– https://mvnrepository.com/artifact/com.alibaba.ververica/ververica-connector-common –>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.ververica</groupId>-->
<!-- <artifactId>ververica-connector-common</artifactId>-->
<!-- <version>1.17-vvr-8.0.8</version>-->
<!--<!– <scope>provided</scope>–>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>com.alibaba.ververica</groupId>-->
<!-- <artifactId>ververica-connector-continuous-odps</artifactId>-->
<!-- <version>1.17-vvr-8.0.8</version>-->
<!-- <scope>provided</scope>-->
<!-- </dependency>-->
<!-- <dependency>-->
<!-- <groupId>org.apache.flink</groupId>-->
<!-- <artifactId>flink-connector-base</artifactId>-->
<!-- <version>${flink.version}</version>-->
<!--<!– <scope>provided</scope>–>-->
<!-- </dependency>-->
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-core -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-core</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-runtime -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime</artifactId>
<version>${flink.version}</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-common -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-api-java-bridge -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-table-planner -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.12</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>2.12.7</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.projectlombok/lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.34</version>
<!-- <scope>provided</scope>-->
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.23</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.ververica/flink-connector-debezium -->
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-debezium</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-mysql-cdc</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-sql-connector-mysql-cdc</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-connector-oracle-cdc</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.ververica</groupId>
<artifactId>flink-sql-connector-oracle-cdc</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>com.oracle.database.jdbc</groupId>
<artifactId>ojdbc8</artifactId>
<version>19.3.0.0</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-filesystem -->
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-jdbc -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc</artifactId>
<version>3.1.0-1.17</version>
<scope>provided</scope>
</dependency>
<!-- flink-doris-connector -->
<!-- https://mvnrepository.com/artifact/org.apache.doris/flink-doris-connector-1.17 -->
<dependency>
<groupId>org.apache.doris</groupId>
<artifactId>flink-doris-connector-1.17</artifactId>
<version>24.1.0</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-hive -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-hive_2.12</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/com.alibaba/druid -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.2.24</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.clickhouse/clickhouse-jdbc -->
<!-- https://mvnrepository.com/artifact/ru.yandex.clickhouse/clickhouse-jdbc -->
<dependency>
<groupId>ru.yandex.clickhouse</groupId>
<artifactId>clickhouse-jdbc</artifactId>
<version>0.3.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.36</version>
<!-- <scope>provided</scope>-->
</dependency>
</dependencies>
<build>
<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.13.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<compilerVersion>1.8</compilerVersion>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<archive>
<manifest>
<mainClass></mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
3、DatahubSourceFunction
sql
package com.tbea.datahub;
import com.alibaba.ververica.connectors.datahub.source.DatahubSourceFunction;
import com.tbea.pojo.DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo;
import com.tbea.sink.ClickHouseBatchSink;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import shaded.datahub.com.aliyun.datahub.client.model.RecordEntry;
import shaded.datahub.com.aliyun.datahub.client.model.TupleRecordData;
import java.io.Serializable;
import java.math.BigDecimal;
import java.sql.Date;
import java.time.LocalDate;
import java.time.ZoneId;
import java.util.concurrent.TimeUnit;
public class DwdStckXnySalesOtbndOrderWtrlSnDtlwDf implements Serializable {
private static final long serialVersionUID = 1L;
private static final String ENDPOINT = "https://datahub.cn-beijing-tbdg-d01.dh.res.bigdata.XXXXXXXXXX.com";;
private static final String PROJECT_NAME = "bigdata";
private static final String TOPIC_NAME = "dwd_stck_xny_sales_otbnd_order_wtrl_sn_dtl_w_df";
private static final String SUB_ID = "";
private static final String ACCESS_ID = "XXXXXXX";
private static final String ACCESS_KEY = "XXXXXXXXXX";
private static final String RUN_MODE = "public";
private static final boolean ENABLE_SCHEMA_REGISTRY = false;
private static int cnt=0;
private static Long datahubStartInMs = 0L;//设置消费的启动位点对应的时间。
private static Long datahubEndInMs=Long.MAX_VALUE;
public void runExample() throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(3);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
3, // 尝试重启的最大次数
Time.of(10, TimeUnit.SECONDS) // 每次重启之间的延迟
));
//指定checkpoint之间的时间间隔,单位是毫秒(ms)
env.enableCheckpointing(10000L, CheckpointingMode.EXACTLY_ONCE);
// 获取当前日期(不含时间)
LocalDate today = LocalDate.now();
// 转换为当天的零点时间戳(默认时区)
long midnightTimestamp = today.atStartOfDay(ZoneId.systemDefault())
.toInstant()
.toEpochMilli();
System.out.println(today);
System.out.println(midnightTimestamp);
DatahubSourceFunction datahubSource =
new DatahubSourceFunction(
ENDPOINT,
PROJECT_NAME,
TOPIC_NAME,
SUB_ID,
ACCESS_ID,
ACCESS_KEY,
RUN_MODE,
ENABLE_SCHEMA_REGISTRY,
midnightTimestamp,
datahubEndInMs);
datahubSource.setRequestTimeout(30 * 1000);
datahubSource.enableExitAfterReadFinished();
SingleOutputStreamOperator<DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo> dwdStckXnySalesOtbndOrderWtrlSnDtlwDf
= env.addSource(datahubSource)
.map(new MapFunction<RecordEntry, DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo>() {
@Override
public DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo map(RecordEntry recordEntry) throws Exception {
DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo dwdStckXnySalesOtbndOrderWtrlSnDtlwDf = new DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo();
TupleRecordData recordData = (TupleRecordData) (recordEntry.getRecordData());
// System.out.println("已处理数据条数:"+String.valueOf(cnt+=1));
// System.out.println("数据上传时间:"+(String) recordData.getField(82));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLedgrId((String) recordData.getField(0));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLedgrNm((String) recordData.getField(1));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setOuId((String) recordData.getField(2));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setOuName((String) recordData.getField(3));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOutOrderId((String) recordData.getField(4));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOutOrderCode((String) recordData.getField(5));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOutOrderRowno((Long) recordData.getField(6));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setItemId((String) recordData.getField(8));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setItemCode((String) recordData.getField(9));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSpec((String) recordData.getField(10));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setModel((String) recordData.getField(11));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setBatchNmbr((String) recordData.getField(12));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setPrqty((BigDecimal) recordData.getField(13));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setQty((BigDecimal) recordData.getField(14));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWrhsId((String) recordData.getField(19));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWrhsCode((String) recordData.getField(20));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWrhslcId((String) recordData.getField(21));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWrhslcCode((String) recordData.getField(22));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setState((Long) recordData.getField(23));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLineSrcType((Long) recordData.getField(24));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLineSrcCode((String) recordData.getField(25));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setLineSrcId((String) recordData.getField(26));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setCreateUserId((String) recordData.getField(30));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setMdfyUserId((String) recordData.getField(31));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setCreateUserName((String) recordData.getField(32));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setMdfyUserName((String) recordData.getField(33));
//long类型时间转date
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setCreateDate(null==recordData.getField(34)?null:String.valueOf(new Date((Long) recordData.getField(34)/1000)));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setMdfyDate(null==recordData.getField(35)?null: new Date((Long) recordData.getField(35)/1000));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setOrgId((String) recordData.getField(40));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setUntStockMainname((String) recordData.getField(48));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setPrjctNmbr((String) recordData.getField(50));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setDlvryPlanCode((String) recordData.getField(54));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setDlvryPlanId((String) recordData.getField(55));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setDlvryPlanLineId((String) recordData.getField(56));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setErpBatchNmbr((String) recordData.getField(57));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setOutCheckState((Long) recordData.getField(58));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOrderId((String) recordData.getField(60));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOrderCode((String) recordData.getField(61));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setNewPrjctNmbr((String) recordData.getField(63));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnMdfyDate(null==recordData.getField(65)?null:String.valueOf(new Date((Long) recordData.getField(65)/1000)));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnMdfyUserId((String) recordData.getField(66));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnCreateUserId((String) recordData.getField(67));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnLineSeq((Long) recordData.getField(68));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnCreateDate(null==recordData.getField(69)?null:String.valueOf(new Date((Long) recordData.getField(69)/1000)));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setBarCode((String) recordData.getField(70));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setBaleCode((String) recordData.getField(71));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setPalletCode((String) recordData.getField(72));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setSaleOutOrderLineId((String) recordData.getField(73));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnBatchNmbr((String) recordData.getField(74));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnItemId((String) recordData.getField(75));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnItemCode((String) recordData.getField(76));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnQty((BigDecimal) recordData.getField(77));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setUnitStockMainName((String) recordData.getField(78));
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.setWtrlSnOutCheckState((Long) recordData.getField(79));
return dwdStckXnySalesOtbndOrderWtrlSnDtlwDf;
}
});
// dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.print();
dwdStckXnySalesOtbndOrderWtrlSnDtlwDf.addSink(new ClickHouseBatchSink(
"jdbc:clickhouse://10.10.10.104:8123/qualityDB",
10000
));
env.execute(DwdStckXnySalesOtbndOrderWtrlSnDtlwDf.class.getName());
}
public static void main(String[] args) throws Exception {
DwdStckXnySalesOtbndOrderWtrlSnDtlwDf sourceFunctionExample = new DwdStckXnySalesOtbndOrderWtrlSnDtlwDf();
sourceFunctionExample.runExample();
}
}
4、ClickHouseBatchSink
重点引入ReentrantLock ,处理高并行度处理数据是,批写入时可能造成ArrayList.size>batchSize,从而报错Caused by: java.lang.ArrayIndexOutOfBoundsException;
java
package com.tbea.sink;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import ru.yandex.clickhouse.ClickHouseConnection;
import ru.yandex.clickhouse.ClickHouseDataSource;
import ru.yandex.clickhouse.ClickHousePreparedStatement;
import ru.yandex.clickhouse.settings.ClickHouseProperties;
import com.tbea.pojo.DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo;
import java.sql.Date;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.locks.ReentrantLock;
public class ClickHouseBatchSink extends RichSinkFunction<DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo> {
private transient ClickHouseDataSource dataSource;
private transient ClickHouseConnection connection;
private transient ClickHousePreparedStatement preparedStatement;
// 使用ThreadLocal确保线程安全
private static final ThreadLocal<SimpleDateFormat> dateFormatThreadLocal =
ThreadLocal.withInitial(() -> new SimpleDateFormat("yyyy-MM-dd"));
private final String jdbcUrl;
private final int batchSize;
//可重入互斥锁
private final ReentrantLock lock;
private List<DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo> batchList;
public ClickHouseBatchSink(String jdbcUrl, int batchSize) {
this.jdbcUrl = jdbcUrl;
this.batchSize = batchSize;
this.lock = new ReentrantLock();
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ClickHouseProperties properties = new ClickHouseProperties();
properties.setUser("default");
properties.setPassword("1111111");
this.dataSource = new ClickHouseDataSource(jdbcUrl, properties);
this.connection = dataSource.getConnection().unwrap(ClickHouseConnection.class);
String sql = "INSERT INTO dwd_stck_xny_sales_otbnd_order_wtrl_sn_dtl_w_df(" +
"ledgr_id\n" +
",ledgr_nm\n" +
",ou_id\n" +
",ou_name\n" +
",sale_out_order_id\n" +
",sale_out_order_code\n" +
",sale_out_order_rowno\n" +
",item_id\n" +
",item_code\n" +
",spec\n" +
",model\n" +
",batch_nmbr\n" +
",prqty\n" +
",qty\n" +
",wrhs_id\n" +
",wrhs_code\n" +
",wrhslc_id\n" +
",wrhslc_code\n" +
",state\n" +
",line_src_type\n" +
",line_src_code\n" +
",line_src_id\n" +
",create_user_id\n" +
",mdfy_user_id\n" +
",create_user_name\n" +
",mdfy_user_name\n" +
",create_date\n" +
",mdfy_date\n" +
",org_id\n" +
",unt_stock_mainname\n" +
",prjct_nmbr\n" +
",dlvry_plan_code\n" +
",dlvry_plan_id\n" +
",dlvry_plan_line_id\n" +
",erp_batch_nmbr\n" +
",out_check_state\n" +
",sale_order_id\n" +
",sale_order_code\n" +
",new_prjct_nmbr\n" +
",wtrl_sn_mdfy_date\n" +
",wtrl_sn_mdfy_user_id\n" +
",wtrl_sn_create_user_id\n" +
",wtrl_sn_line_seq\n" +
",wtrl_sn_create_date\n" +
",bar_code\n" +
",bale_code\n" +
",pallet_code\n" +
",sale_out_order_line_id\n" +
",wtrl_sn_batch_nmbr\n" +
",wtrl_sn_item_id\n" +
",wtrl_sn_item_code\n" +
",wtrl_sn_qty\n" +
",unit_stock_main_name\n" +
",wtrl_sn_out_check_state) " +
"VALUES (?,?,?,?,?,?,?,?,?,?\n" +
",?,?,?,?,?,?,?,?,?,?\n" +
",?,?,?,?,?,?,?,?,?,?\n" +
",?,?,?,?,?,?,?,?,?,?\n" +
",?,?,?,?,?,?,?,?,?,?\n" +
",?,?,?,?)";
this.preparedStatement = (ClickHousePreparedStatement) connection.prepareStatement(sql);
this.preparedStatement.setMaxRows(batchSize);
this.batchList = new ArrayList<>(batchSize);
}
@Override
public void invoke(DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo value, Context context) throws Exception {
//并行度 > 1 时,多个线程可能同时调用 invoke() 方法,很可能造成batchList.size() > batchSize
lock.lock();
try {
batchList.add(value);
if (batchList.size() >= batchSize) {
flush();
}
} finally {
lock.unlock();
}
}
@Override
public void close() throws Exception {
lock.lock();
try {
if (!batchList.isEmpty()) {
flush();
}
} finally {
lock.unlock();
}
// 清理资源
try {
if (preparedStatement != null) {
preparedStatement.close();
}
} catch (Exception e) {
System.err.println("Error closing prepared statement: " + e.getMessage());
}
try {
if (connection != null && !connection.isClosed()) {
connection.close();
}
} catch (Exception e) {
System.err.println("Error closing connection: " + e.getMessage());
}
// 清理ThreadLocal
dateFormatThreadLocal.remove();
super.close();
}
private void flush() throws Exception {
if (batchList.isEmpty()) {
return;
}
try {
for (DwdStckXnySalesOtbndOrderWtrlSnDtlwDfPojo value : batchList) {
preparedStatement.setString(1, value.getLedgrId());
preparedStatement.setString(2, value.getLedgrNm());
preparedStatement.setString(3, value.getOuId());
preparedStatement.setString(4, value.getOuName());
preparedStatement.setString(5, value.getSaleOutOrderId());
preparedStatement.setString(6, value.getSaleOutOrderCode());
preparedStatement.setObject(7, null==value.getSaleOutOrderRowno()?null:value.getSaleOutOrderRowno());
preparedStatement.setString(8, value.getItemId());
preparedStatement.setString(9, value.getItemCode());
preparedStatement.setString(10, value.getSpec());
preparedStatement.setString(11, value.getModel());
preparedStatement.setString(12, value.getBatchNmbr());
preparedStatement.setBigDecimal(13, value.getPrqty());
preparedStatement.setBigDecimal(14, value.getQty());
preparedStatement.setString(15, value.getWrhsId());
preparedStatement.setString(16, value.getWrhsCode());
preparedStatement.setString(17, value.getWrhslcId());
preparedStatement.setString(18, value.getWrhslcCode());
preparedStatement.setObject(19, null==value.getState()?null:value.getState());
preparedStatement.setObject(20, null==value.getLineSrcType()?null:value.getLineSrcType());
preparedStatement.setString(21, value.getLineSrcCode());
preparedStatement.setString(22, value.getLineSrcId());
preparedStatement.setString(23, value.getCreateUserId());
preparedStatement.setString(24, value.getMdfyUserId());
preparedStatement.setString(25, value.getCreateUserName());
preparedStatement.setString(26, value.getMdfyUserName());
preparedStatement.setString(27, String.valueOf(value.getCreateDate()));
if(null!=value.getMdfyDate()){
Date mdfyDate = value.getMdfyDate();
String formatmdfyDate = dateFormatThreadLocal.get().format(mdfyDate);
preparedStatement.setDate(28, Date.valueOf(formatmdfyDate));
}else {
preparedStatement.setDate(28, null);
}
preparedStatement.setString(29, value.getOrgId());
preparedStatement.setString(30, value.getUntStockMainname());
preparedStatement.setString(31, value.getPrjctNmbr());
preparedStatement.setString(32, value.getDlvryPlanCode());
preparedStatement.setString(33, value.getDlvryPlanId());
preparedStatement.setString(34, value.getDlvryPlanLineId());
preparedStatement.setString(35, value.getErpBatchNmbr());
preparedStatement.setObject(36, null==value.getOutCheckState()?null:value.getOutCheckState());
preparedStatement.setString(37, value.getSaleOrderId());
preparedStatement.setString(38, value.getSaleOrderCode());
preparedStatement.setString(39, value.getNewPrjctNmbr());
preparedStatement.setString(40, String.valueOf(value.getWtrlSnMdfyDate()));
preparedStatement.setString(41, value.getWtrlSnMdfyUserId());
preparedStatement.setString(42, value.getWtrlSnCreateUserId());
preparedStatement.setObject(43, null==value.getWtrlSnLineSeq()?null:value.getWtrlSnLineSeq());
preparedStatement.setString(44, String.valueOf(value.getWtrlSnMdfyDate()));
preparedStatement.setString(45, value.getBarCode());
preparedStatement.setString(46, value.getBaleCode());
preparedStatement.setString(47, value.getPalletCode());
preparedStatement.setString(48, value.getSaleOutOrderLineId());
preparedStatement.setString(49, value.getWtrlSnBatchNmbr());
preparedStatement.setString(50, value.getWtrlSnItemId());
preparedStatement.setString(51, value.getWtrlSnItemCode());
preparedStatement.setBigDecimal(52, value.getWtrlSnQty());
preparedStatement.setString(53, value.getUnitStockMainName());
preparedStatement.setObject(54, null==value.getWtrlSnOutCheckState()?null:value.getWtrlSnOutCheckState());
preparedStatement.addBatch();
}
int[] ints = preparedStatement.executeBatch();
System.out.println(ints.length);
preparedStatement.clearBatch();
batchList.clear();
} catch (Exception e) {
System.err.println("Error executing batch insert: " + e.getMessage());
// 可以根据需要决定是重试还是抛出异常
flushWithRetry();
// throw e;
} finally {
batchList.clear();
}
}
private void flushWithRetry() throws Exception {
int retryCount = 0;
int maxRetries = 3;
while (retryCount < maxRetries) {
try {
flush();
return;
} catch (Exception e) {
retryCount++;
if (retryCount >= maxRetries) {
throw e;
}
Thread.sleep(1000 * retryCount); // 指数退避
}
}
}
}
5、工具类
java
package com.tbea.utils;
import com.alibaba.druid.pool.DruidDataSource;
import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
public class ClickHouseConnUtil {
private static volatile DataSource dataSource;
private static String url = "jdbc:clickhouse://10.10.10.104:8123/qualityDB";
private static String username = "default"; // ClickHouse用户名,默认为default
private static String password = "111111"; // ClickHouse密码,默认为空
private static Connection connection;
public static Connection getClickHouseConn() throws ClassNotFoundException, SQLException {
Class.forName("ru.yandex.clickhouse.ClickHouseDriver");
connection = DriverManager.getConnection(url, username, password);
return connection;
}
public static DataSource getClickHouseDataSource() {
if (dataSource == null) {
synchronized (ClickHouseConnUtil.class) {
if (dataSource == null) {
DruidDataSource druidDataSource = new DruidDataSource();
// 基本配置
druidDataSource.setDriverClassName("ru.yandex.clickhouse.ClickHouseDriver");
druidDataSource.setUrl(url);
druidDataSource.setUsername(username);
druidDataSource.setPassword(password);
// 连接池配置
druidDataSource.setInitialSize(5);
druidDataSource.setMinIdle(5);
druidDataSource.setMaxActive(20);
druidDataSource.setMaxWait(60000);
druidDataSource.setTimeBetweenEvictionRunsMillis(60000);
druidDataSource.setMinEvictableIdleTimeMillis(300000);
druidDataSource.setTestWhileIdle(true);
druidDataSource.setTestOnBorrow(false);
druidDataSource.setTestOnReturn(false);
dataSource = druidDataSource;
}
}
}
return dataSource;
}
}