前言
专用压缩任务的前提是DDL的时候配置了write-only=true后,单独开启一个Flink任务去做压缩
二.专用压缩任务流程
1.CompactProcedure.call() -- 入口
java
public String[] call(
ProcedureContext procedureContext,
String tableId,
String partitions,
String orderStrategy,
String orderByColumns,
String tableOptions,
String whereSql,
String partitionIdleTime,
String compactStrategy)
throws Exception {
Map<String, String> catalogOptions = catalog.options();
// 解析专用压缩任务配置的options内部参数,形成map
Map<String, String> tableConf =
StringUtils.isNullOrWhitespaceOnly(tableOptions)
? Collections.emptyMap()
: ParameterUtils.parseCommaSeparatedKeyValues(tableOptions); // 按照,分割
Identifier identifier = Identifier.fromString(tableId);
CompactAction action;
String jobName;
// CASE-1: 没配置order by,走CompactAction
if (orderStrategy.isEmpty() && orderByColumns.isEmpty()) {
// 创建CompactAction
action =
new CompactAction(
identifier.getDatabaseName(),
identifier.getObjectName(),
catalogOptions,
tableConf);
// 配置相关参数
if (!(StringUtils.isNullOrWhitespaceOnly(partitionIdleTime))) {
action.withPartitionIdleTime(TimeUtils.parseDuration(partitionIdleTime));
}
if (checkCompactStrategy(compactStrategy)) {
action.withFullCompaction(compactStrategy.trim().equalsIgnoreCase(FULL));
}
jobName = "Compact Job";
}
// CASE-2: 配置了order by,走SortCompactAction
else if (!orderStrategy.isEmpty() && !orderByColumns.isEmpty()) {
Preconditions.checkArgument(
StringUtils.isNullOrWhitespaceOnly(partitionIdleTime),
"sort compact do not support 'partition_idle_time'.");
action =
new SortCompactAction(
identifier.getDatabaseName(),
identifier.getObjectName(),
catalogOptions,
tableConf)
.withOrderStrategy(orderStrategy)
.withOrderColumns(orderByColumns.split(","));
jobName = "Sort Compact Job";
} else {
throw new IllegalArgumentException(
"You must specify 'order strategy' and 'order by columns' both.");
}
// 设置分区过滤
if (!(StringUtils.isNullOrWhitespaceOnly(partitions))) {
action.withPartitions(ParameterUtils.getPartitions(partitions.split(";")));
}
// 设置where参数
if (!StringUtils.isNullOrWhitespaceOnly(whereSql)) {
action.withWhereSql(whereSql);
}
// 执行
return execute(procedureContext, action, jobName);
}
2.CompactAction类
java
public class CompactAction extends TableActionBase {
private static final Logger LOGGER = LoggerFactory.getLogger(CompactAction.class);
private List<Map<String, String>> partitions;
private String whereSql;
@Nullable private Duration partitionIdleTime = null;
private Boolean fullCompaction;
public CompactAction(
String database,
String tableName,
Map<String, String> catalogConfig,
Map<String, String> tableConf) {
super(database, tableName, catalogConfig);
if (!(table instanceof FileStoreTable)) {
throw new UnsupportedOperationException(
String.format(
"Only FileStoreTable supports compact action. The table type is '%s'.",
table.getClass().getName()));
}
// 强制对压缩任务设置write-only = false,因为压缩任务都设置为true,就没人压缩了
HashMap<String, String> dynamicOptions = new HashMap<>(tableConf);
dynamicOptions.put(CoreOptions.WRITE_ONLY.key(), "false");
table = table.copy(dynamicOptions);
}
// ------------------------------------------------------------------------
// Java API
// ------------------------------------------------------------------------
public CompactAction withPartitions(List<Map<String, String>> partitions) {
this.partitions = partitions;
return this;
}
public CompactAction withWhereSql(String whereSql) {
this.whereSql = whereSql;
return this;
}
public CompactAction withPartitionIdleTime(@Nullable Duration partitionIdleTime) {
this.partitionIdleTime = partitionIdleTime;
return this;
}
public CompactAction withFullCompaction(Boolean fullCompaction) {
this.fullCompaction = fullCompaction;
return this;
}
@Override
public void build() throws Exception {
ReadableConfig conf = env.getConfiguration();
// 是否是流执行模式
boolean isStreaming =
conf.get(ExecutionOptions.RUNTIME_MODE) == RuntimeExecutionMode.STREAMING;
FileStoreTable fileStoreTable = (FileStoreTable) table;
switch (fileStoreTable.bucketMode()) {
case BUCKET_UNAWARE: // bucket = -1 走这
{
buildForUnawareBucketCompaction(env, fileStoreTable, isStreaming);
break;
}
case HASH_FIXED:
case HASH_DYNAMIC:
default: // 其他情况,走这
{
buildForTraditionalCompaction(env, fileStoreTable, isStreaming);
}
}
}
private void buildForTraditionalCompaction(
StreamExecutionEnvironment env, FileStoreTable table, boolean isStreaming)
throws Exception {
// 步1. 确定压缩任务的执行模式,流还是批,并针对流式,强制采用异步压缩参数
if (fullCompaction == null) {
fullCompaction = !isStreaming; // 批量压缩默认是full-compaction,流式需要单独配置
} else {
Preconditions.checkArgument(
!(fullCompaction && isStreaming), // 流模式,不允许配置compactStrategy为FULL,默认是null
"The full compact strategy is only supported in batch mode. Please add -Dexecution.runtime-mode=BATCH.");
}
/* 如果是流式压缩,强制采用异步压缩参数
num-sorted-run.stop-trigger = 2147483647
sort-spill-threshold = 10
lookup-wait = false
*/
if (isStreaming) {
// for completely asynchronous compaction
HashMap<String, String> dynamicOptions =
new HashMap<String, String>() {
{
put(CoreOptions.NUM_SORTED_RUNS_STOP_TRIGGER.key(), "2147483647");
put(CoreOptions.SORT_SPILL_THRESHOLD.key(), "10");
put(CoreOptions.LOOKUP_WAIT.key(), "false");
}
};
table = table.copy(dynamicOptions);
}
// 步2. 创建CompactorSourceBuilder和CompactorSinkBuilder
CompactorSourceBuilder sourceBuilder =
new CompactorSourceBuilder(identifier.getFullName(), table);
CompactorSinkBuilder sinkBuilder = new CompactorSinkBuilder(table, fullCompaction);
sourceBuilder.withPartitionPredicate(getPredicate());
// 步3. 根据CompactorSourceBuilder去创建DataStreamSource,和CompactorSinkBuilder的上游流
DataStreamSource<RowData> source =
sourceBuilder
.withEnv(env)
.withContinuousMode(isStreaming)
.withPartitionIdleTime(partitionIdleTime)
.build();
sinkBuilder.withInput(source).build();
}
private void buildForUnawareBucketCompaction(
StreamExecutionEnvironment env, FileStoreTable table, boolean isStreaming)
throws Exception {
UnawareBucketCompactionTopoBuilder unawareBucketCompactionTopoBuilder =
new UnawareBucketCompactionTopoBuilder(env, identifier.getFullName(), table);
unawareBucketCompactionTopoBuilder.withPartitionPredicate(getPredicate());
unawareBucketCompactionTopoBuilder.withContinuousMode(isStreaming);
unawareBucketCompactionTopoBuilder.withPartitionIdleTime(partitionIdleTime);
unawareBucketCompactionTopoBuilder.build();
}
// 构建谓词过滤
protected Predicate getPredicate() throws Exception {
// 校验partitions和where是不能一起使用的
Preconditions.checkArgument(
partitions == null || whereSql == null,
"partitions and where cannot be used together.");
Predicate predicate = null;
// CASE-1: 使用partitions的参数
if (partitions != null) {
predicate =
PredicateBuilder.or(
partitions.stream()
.map(
p ->
createPartitionPredicate(
p,
table.rowType(),
((FileStoreTable) table)
.coreOptions()
.partitionDefaultName()))
.toArray(Predicate[]::new));
}
// CASE-2: 使用where参数
else if (whereSql != null) {
SimpleSqlPredicateConvertor simpleSqlPredicateConvertor =
new SimpleSqlPredicateConvertor(table.rowType());
predicate = simpleSqlPredicateConvertor.convertSqlToPredicate(whereSql);
}
// Check whether predicate contain non partition key.
// 检查谓词是否包含非分区键。
if (predicate != null) {
LOGGER.info("the partition predicate of compaction is {}", predicate);
PartitionPredicateVisitor partitionPredicateVisitor =
new PartitionPredicateVisitor(table.partitionKeys());
Preconditions.checkArgument(
predicate.visit(partitionPredicateVisitor),
"Only partition key can be specialized in compaction action.");
}
return predicate;
}
// 运行
@Override
public void run() throws Exception {
build(); // 调build()构建Flink DataStream
execute("Compact job"); // 提交 Flink Job
}
}
3.CompactorSourceBuilder -- 构建DataStreamSource
java
// 构造函数
public CompactorSourceBuilder(String tableIdentifier, FileStoreTable table) {
this.tableIdentifier = tableIdentifier;
this.table = table;
}
// build()
public DataStreamSource<RowData> build() {
if (env == null) {
throw new IllegalArgumentException("StreamExecutionEnvironment should not be null.");
}
// 步骤1. 扫描元数据,生成需要压缩的(partition, bucket)列表
// 这里的isContinuous是isStreaming赋值的
CompactBucketsTable compactBucketsTable = new CompactBucketsTable(table, isContinuous);
RowType produceType = compactBucketsTable.rowType();
// 步骤2. 构建DataStreamSource
DataStreamSource<RowData> dataStream =
env.fromSource(
buildSource(compactBucketsTable),
WatermarkStrategy.noWatermarks(),
tableIdentifier + "-compact-source",
InternalTypeInfo.of(LogicalTypeConversion.toLogicalType(produceType)));
// 步骤3. 采用分区谓词过滤,流处理模式不支持partitionIdleTime
if (isContinuous) {
Preconditions.checkArgument(
partitionIdleTime == null, "Streaming mode does not support partitionIdleTime");
} else if (partitionIdleTime != null) {
// 批处理模式可以过滤掉最近修改的分区
Map<BinaryRow, Long> partitionInfo = getPartitionInfo(compactBucketsTable);
long historyMilli =
LocalDateTime.now()
.minus(partitionIdleTime)
.atZone(ZoneId.systemDefault())
.toInstant()
.toEpochMilli();
SingleOutputStreamOperator<RowData> filterStream =
dataStream.filter(
rowData -> {
BinaryRow partition = deserializeBinaryRow(rowData.getBinary(1));
return partitionInfo.get(partition) <= historyMilli;
});
dataStream = new DataStreamSource<>(filterStream);
}
// 步骤4. 设置并行度
Integer parallelism =
Options.fromMap(table.options()).get(FlinkConnectorOptions.SCAN_PARALLELISM);
if (parallelism != null) {
dataStream.setParallelism(parallelism);
}
return dataStream;
}
4.CompactorSinkBuilder -- 构建DataStreamSink
java
public class CompactorSinkBuilder {
private final FileStoreTable table; // 绑定的table表
private DataStream<RowData> input; // 输入流
private final boolean fullCompaction; // 是否配置compactStrategy为full,或者采用批处理模式
public CompactorSinkBuilder(FileStoreTable table, boolean fullCompaction) {
this.table = table;
this.fullCompaction = fullCompaction;
}
public CompactorSinkBuilder withInput(DataStream<RowData> input) {
this.input = input;
return this;
}
public DataStreamSink<?> build() {
// 不支持bucket = -1的
BucketMode bucketMode = table.bucketMode();
switch (bucketMode) {
case HASH_FIXED:
case HASH_DYNAMIC:
return buildForBucketAware();
case BUCKET_UNAWARE:
default:
throw new UnsupportedOperationException("Unsupported bucket mode: " + bucketMode);
}
}
private DataStreamSink<?> buildForBucketAware() {
Integer parallelism =
Optional.ofNullable(
table.options().get(FlinkConnectorOptions.SINK_PARALLELISM.key()))
.map(Integer::valueOf)
.orElse(null);
DataStream<RowData> partitioned =
partition(input, new BucketsRowChannelComputer(), parallelism);
// 构建CompactorSink
return new CompactorSink(table, fullCompaction).sinkFrom(partitioned);
}
}
5.CompactorSink
java
public class CompactorSink extends FlinkSink<RowData> {
private static final long serialVersionUID = 1L;
private final boolean fullCompaction; // 是否配置compactStrategy为full,或者采用批处理模式
public CompactorSink(FileStoreTable table, boolean fullCompaction) {
super(table, false);
this.fullCompaction = fullCompaction;
}
// 创建写入算子Factory对象
@Override
protected OneInputStreamOperatorFactory<RowData, Committable> createWriteOperatorFactory(
StoreSinkWrite.Provider writeProvider, String commitUser) {
// 创建 StoreCompactOperator.Factory
return new StoreCompactOperator.Factory(table, writeProvider, commitUser, fullCompaction);
}
// 创建提交算子Factory对象
@Override
protected Committer.Factory<Committable, ManifestCommittable> createCommitterFactory() {
return context -> new StoreCommitter(table, table.newCommit(context.commitUser()), context);
}
// 创建CommittableStateManager
@Override
protected CommittableStateManager<ManifestCommittable> createCommittableStateManager() {
return new NoopCommittableStateManager();
}
}
6.StoreCompactOperator
(1) 构造函数
java
private StoreCompactOperator(
StreamOperatorParameters<Committable> parameters,
FileStoreTable table,
StoreSinkWrite.Provider storeSinkWriteProvider,
String initialCommitUser,
boolean fullCompaction) {
super(parameters, Options.fromMap(table.options()));
// 检验write-only参数必须为false,因为压缩任务如果都为true,那么就没人执行压缩操作了
// 在 CompactAction 构造函数中已经强制设置 write-only=false
Preconditions.checkArgument(
!table.coreOptions().writeOnly(),
CoreOptions.WRITE_ONLY.key() + " should not be true for StoreCompactOperator.");
this.table = table;
this.storeSinkWriteProvider = storeSinkWriteProvider;
this.initialCommitUser = initialCommitUser;
this.fullCompaction = fullCompaction;
}
(2) processElement()
java
@Override
public void processElement(StreamRecord<RowData> element) throws Exception {
RowData record = element.getValue();
// 解析数据
long snapshotId = record.getLong(0); // 快照ID
BinaryRow partition = deserializeBinaryRow(record.getBinary(1)); // 分区键
int bucket = record.getInt(2); // 桶号
byte[] serializedFiles = record.getBinary(3); // 序列化的文件列表
List<DataFileMeta> files = dataFileMetaSerializer.deserializeList(serializedFiles);
// 流式模式,调notifyNewFiles()通知新文件
if (write.streamingMode()) {
write.notifyNewFiles(snapshotId, partition, bucket, files);
}
// 批量模式,检查文件是否为空
else {
Preconditions.checkArgument(
files.isEmpty(),
"Batch compact job does not concern what files are compacted. "
+ "They only need to know what buckets are compacted.");
}
// 记录待压缩的 (partition, bucket),后续在prepareCommit()中会压缩这里的文件
waitToCompact.add(Pair.of(partition, bucket));
}
(3) prepareCommit() -- 执行压缩的入口
java
@Override
protected List<Committable> prepareCommit(boolean waitCompaction, long checkpointId)
throws IOException {
try {
// 遍历所有待压缩的<partition, bucket>,这里最后会调这里的FileStoreWrite的实现类的compact(),如AbstaracFileStoreWrite
for (Pair<BinaryRow, Integer> partitionBucket : waitToCompact) {
write.compact(partitionBucket.getKey(), partitionBucket.getRight(), fullCompaction);
}
} catch (Exception e) {
throw new RuntimeException("Exception happens while executing compaction.", e);
}
// 清空集合
waitToCompact.clear();
// 执行StoreSinkWrite实现类的prepareCommit()
return write.prepareCommit(waitCompaction, checkpointId);
}
<1> 调用的StoreSinkWriteImpl.compact()
java
@Override
public void compact(BinaryRow partition, int bucket, boolean fullCompaction) throws Exception {
write.compact(partition, bucket, fullCompaction);
}
<2> 调用的TableWriteImpl.compact()
java
@Override
public void compact(BinaryRow partition, int bucket, boolean fullCompaction) throws Exception {
// 这里的write是FileStoreWrite的实现类,如AbstractFileStoreWrite
write.compact(partition, bucket, fullCompaction);
}
<3> 调用的AbstractFileStoreWrite.compact()
java
@Override
public void compact(BinaryRow partition, int bucket, boolean fullCompaction) throws Exception {
// 1.先调getWriterWrapper()获取对应partition-bucket的WriterContainer
// 2.用其中的MergeTreeWriter实现类的compact方法进行压缩,并传入fullCompaction是否需要全量压缩
getWriterWrapper(partition, bucket).writer.compact(fullCompaction);
}
<4> 调用的RecordWriter实现类MergeTreeRecordWriter.compact()
java
@Override
public void compact(boolean fullCompaction) throws Exception {
// 调flushWriteBuffer(true, fullCompaction)
flushWriteBuffer(true, fullCompaction);
}
剩下流程详情看Paimon源码解读 -- Compaction-7.FULL_COMPACTION_DELTA_COMMITS和Paimon源码解读 -- Compaction-4.KeyValueFileStoreWrite
三.总结
sequenceDiagram
participant User as 用户 SQL
participant CP as CompactProcedure
participant PU as ParameterUtils
participant CA as CompactAction
participant CSB as CompactorSourceBuilder
participant CBS as CompactorSinkBuilder
participant CBT as CompactBucketsTable
participant SCO as StoreCompactOperator
participant SW as StoreSinkWrite
participant MCM as MergeTreeCompactManager
participant UC as UniversalCompaction
User->>CP: CALL sys.compact(...)
Note over CP: 步骤 1: 参数解析
CP->>PU: parseCommaSeparatedKeyValues('sink.parallelism=4')
PU-->>CP: Map{"sink.parallelism": "4"}
CP->>PU: getPartitions(['p=0'])
PU->>PU: parseCommaSeparatedKeyValues('p=0')
PU-->>CP: List[Map{"p": "0"}]
Note over CP: 步骤 2: 创建 CompactAction
CP->>CA: new CompactAction(
"default", "T",
catalogOptions,
tableConf) Note over CA: 关键: 强制设置 write-only=false CA->>CA: dynamicOptions.put("write-only", "false")
table = table.copy(dynamicOptions) Note over CP: 步骤 3: 配置过滤条件 CP->>CA: withPartitions([{"p": "0"}]) CP->>CA: withWhereSql("dt>10 and h<20") Note over CP: 步骤 4: 执行任务 CP->>CA: run() CA->>CA: build() Note over CA: 步骤 5: 创建 Source CA->>CSB: new CompactorSourceBuilder(table) CSB->>CBT: new CompactBucketsTable(table) Note over CBT: 扫描元数据,
生成需要压缩的
(partition, bucket) 列表 CSB->>CSB: withPartitionPredicate(p=0) CSB->>CSB: build() CSB-->>CA: DataStreamSource
Note over CA: 步骤 6: 创建 Sink
CA->>CBS: new CompactorSinkBuilder(table, false)
CBS->>CBS: withInput(source)
CBS->>CBS: build()
CBS->>SCO: new StoreCompactOperator(...)
Note over SCO: 校验: write-only 必须为 false
Note over CA: 步骤 7: 执行 Flink Job
CA->>CA: execute("Compact job")
loop 每条压缩任务记录
SCO->>SCO: processElement(record)
Note over SCO: 记录格式:
(snapshotId, partition, bucket, files) SCO->>SCO: 解析: partition=BinaryRow{p=0}
bucket=0 SCO->>SCO: waitToCompact.add((partition, bucket)) end Note over SCO: Checkpoint 触发 SCO->>SCO: prepareCommit(checkpointId) loop waitToCompact 中的每个 (partition, bucket) SCO->>SW: compact(partition=p0, bucket=0, fullCompaction=false) SW->>MCM: triggerCompaction(false) MCM->>UC: pick(numLevels, runs) Note over UC: 应用压缩策略:
1. 时间间隔检查
2. 空间放大检查
3. 大小比率检查
4. 文件数量检查 UC-->>MCM: CompactUnit MCM->>MCM: submitCompaction(unit) Note over MCM: 异步执行压缩任务 MCM-->>SW: CompactResult SW-->>SCO: List
end
SCO->>SCO: 提交 Committable
SCO-->>User: 压缩完成
"default", "T",
catalogOptions,
tableConf) Note over CA: 关键: 强制设置 write-only=false CA->>CA: dynamicOptions.put("write-only", "false")
table = table.copy(dynamicOptions) Note over CP: 步骤 3: 配置过滤条件 CP->>CA: withPartitions([{"p": "0"}]) CP->>CA: withWhereSql("dt>10 and h<20") Note over CP: 步骤 4: 执行任务 CP->>CA: run() CA->>CA: build() Note over CA: 步骤 5: 创建 Source CA->>CSB: new CompactorSourceBuilder(table) CSB->>CBT: new CompactBucketsTable(table) Note over CBT: 扫描元数据,
生成需要压缩的
(partition, bucket) 列表 CSB->>CSB: withPartitionPredicate(p=0) CSB->>CSB: build() CSB-->>CA: DataStreamSource
(snapshotId, partition, bucket, files) SCO->>SCO: 解析: partition=BinaryRow{p=0}
bucket=0 SCO->>SCO: waitToCompact.add((partition, bucket)) end Note over SCO: Checkpoint 触发 SCO->>SCO: prepareCommit(checkpointId) loop waitToCompact 中的每个 (partition, bucket) SCO->>SW: compact(partition=p0, bucket=0, fullCompaction=false) SW->>MCM: triggerCompaction(false) MCM->>UC: pick(numLevels, runs) Note over UC: 应用压缩策略:
1. 时间间隔检查
2. 空间放大检查
3. 大小比率检查
4. 文件数量检查 UC-->>MCM: CompactUnit MCM->>MCM: submitCompaction(unit) Note over MCM: 异步执行压缩任务 MCM-->>SW: CompactResult SW-->>SCO: List