SeaTunnel扩展Transform插件,自定义转换插件

代码结构

在seatunnel-transforms-v2中新建数据包名,新建XXXTransform,XXXTransformConfig,XXXTransformFactory三个类

自定义转换插件功能说明

这是个适配KafkaSource的转换插件,接收到的原文格式为:

{"path":"xxx.log.gz","code":"011","cont":"{\"ID\":\"1\",\"NAME\":\"zhangsan\",\"TABLE\":\"USER\",\"create_time\":\"20230904\"}","timestamp":"20230823160246"}

需要转换为只保留cont里面的数据

{"create_time":"20230904","NAME":"zhangsan","TABLE":"USER","ID":"999"}

任务配置文件

env {

  # You can set engine configuration here STREAMING BATCH

  execution.parallelism = 1

  job.mode = "STREAMING"



  #execution.checkpoint.data-uri = "hdfs://localhost:9000/checkpoint"

}



source {

  # This is a example source plugin **only for test and demonstrate the feature source plugin**

   Kafka {

            bootstrap.servers = "xxxxx:9092"

            topic = "test_in2"

            consumer.group = "167321237613

            format="text"

            result_table_name="kafka"

        }

}



transform {

    ExtractFromCJ {

    source_table_name="kafka"

    result_table_name="kafka1"

    schema = {

        fields {

                NAME = "string"

                TABLE = "string"

                create_time = "string"

                ID="string"

            }

        }

    }

}



sink {

  kafka {

      source_table_name="kafka1"

      topic = "test_out2"

      bootstrap.servers = "xxxx:9092"

      kafka.request.timeout.ms = 60000

      semantics = EXACTLY_ONCE

  }

}

代码说明

XXXConfig代码,这个类主要用来保存transform的配置项

package org.apache.seatunnel.transform.extract;



import lombok.Getter;

import lombok.Setter;

import org.apache.seatunnel.api.configuration.Option;

import org.apache.seatunnel.api.configuration.Options;

import org.apache.seatunnel.api.configuration.ReadonlyConfig;



import java.io.Serializable;

import java.util.Map;



@Getter

@Setter

public class ExtractFromCJTransformConfig implements Serializable {



    public static final Option<Map<String, String>> SCHEMA =

            Options.key("schema.fields")

                    .mapType()

                    .noDefaultValue()

                    .withDescription(

                            "Specify the field mapping relationship between input and output");



    private Map<String, String> fieldColumns;

    public static ExtractFromCJTransformConfig of(ReadonlyConfig config) {

        ExtractFromCJTransformConfig extractFromCJTransformConfig = new ExtractFromCJTransformConfig();

        Map<String, String> fieldColumns = config.get(SCHEMA);

        extractFromCJTransformConfig.setFieldColumns(fieldColumns);

        return extractFromCJTransformConfig;

    }

}

XXXTransformFactory说明,工厂类,主要用来初始化具体的转换类

package org.apache.seatunnel.transform.extract;



import com.google.auto.service.AutoService;

import org.apache.seatunnel.api.configuration.ReadonlyConfig;

import org.apache.seatunnel.api.configuration.util.OptionRule;

import org.apache.seatunnel.api.table.catalog.CatalogTable;

import org.apache.seatunnel.api.table.connector.TableTransform;

import org.apache.seatunnel.api.table.factory.Factory;

import org.apache.seatunnel.api.table.factory.TableFactoryContext;

import org.apache.seatunnel.api.table.factory.TableTransformFactory;



@AutoService(Factory.class)

public class ExtractFromCJTransformFactory implements TableTransformFactory {

    @Override

    public String factoryIdentifier() {

        return  "ExtractFromCJ";

    }



    @Override

    public OptionRule optionRule() {

        return OptionRule.builder().optional(ExtractFromCJTransformConfig.SCHEMA).build();

    }



    @Override

    public TableTransform createTransform(TableFactoryContext context) {

        CatalogTable catalogTable = context.getCatalogTable();

        ReadonlyConfig options = context.getOptions();

        ExtractFromCJTransformConfig extractFromCJTransformConfig =

                ExtractFromCJTransformConfig.of(options);

        return () -> new ExtractFromCJTransform(extractFromCJTransformConfig, catalogTable);

    }

}

XXXXTransform,具体的转换类,主要用于对source数据的处理,还有数据结构类型的保存

package org.apache.seatunnel.transform.extract;



import cn.hutool.core.collection.CollUtil;

import cn.hutool.json.JSONObject;

import cn.hutool.json.JSONUtil;

import com.google.auto.service.AutoService;

import lombok.NoArgsConstructor;

import lombok.NonNull;

import lombok.extern.slf4j.Slf4j;

import org.apache.seatunnel.api.configuration.ReadonlyConfig;

import org.apache.seatunnel.api.configuration.util.ConfigValidator;

import org.apache.seatunnel.api.table.catalog.CatalogTable;

import org.apache.seatunnel.api.table.catalog.CatalogTableUtil;

import org.apache.seatunnel.api.table.catalog.Column;

import org.apache.seatunnel.api.table.catalog.ConstraintKey;

import org.apache.seatunnel.api.table.catalog.PhysicalColumn;

import org.apache.seatunnel.api.table.catalog.PrimaryKey;

import org.apache.seatunnel.api.table.catalog.TableIdentifier;

import org.apache.seatunnel.api.table.catalog.TableSchema;

import org.apache.seatunnel.api.table.type.SeaTunnelDataType;

import org.apache.seatunnel.api.table.type.SeaTunnelRow;

import org.apache.seatunnel.api.table.type.SeaTunnelRowType;

import org.apache.seatunnel.api.transform.SeaTunnelTransform;

import org.apache.seatunnel.shade.com.typesafe.config.Config;

import org.apache.seatunnel.transform.common.AbstractCatalogSupportTransform;



import java.util.ArrayList;

import java.util.List;

import java.util.stream.Collectors;



@AutoService(SeaTunnelTransform.class)

@NoArgsConstructor

@Slf4j

public class ExtractFromCJTransform extends AbstractCatalogSupportTransform {



    private ExtractFromCJTransformConfig config;

    protected SeaTunnelRowType inputRowType;

    @Override

    public String getPluginName() {

        return "ExtractFromCJ";

    }



    public ExtractFromCJTransform(

            @NonNull ExtractFromCJTransformConfig config, @NonNull CatalogTable catalogTable) {

        super(catalogTable);

        this.config = config;

    }

    @Override

    protected void setConfig(Config pluginConfig) {

        ConfigValidator.of(ReadonlyConfig.fromConfig(pluginConfig))

                .validate(new ExtractFromCJTransformFactory().optionRule());

        this.config = ExtractFromCJTransformConfig.of(ReadonlyConfig.fromConfig(pluginConfig));

    }



    @Override

    protected SeaTunnelRowType transformRowType(SeaTunnelRowType inputRowType) {

        return inputRowType;

    }



    @Override

    protected SeaTunnelRow transformRow(SeaTunnelRow inputRow) {

        Object content = inputRow.getFields()[0];

        String data = content.toString();

        Object[] outputDataArray = new Object[0];

        if (JSONUtil.isJson(data)) {

            JSONObject cont = JSONUtil.parseObj(data).getJSONObject("cont");

            if (!cont.isEmpty()) {

                if (!CollUtil.isEmpty(this.config.getFieldColumns())) {

                    outputDataArray = new Object[this.config.getFieldColumns().size()];

                    int t = 0;

                    for (String key : this.config.getFieldColumns().keySet()) {

                        String value = cont.getStr(key);

                        outputDataArray[t] = value;

                        t++;

                    }

                } else {

                    outputDataArray = new Object[1];

                    outputDataArray[0] = JSONUtil.toJsonStr(cont);

                }

            }

        }

        SeaTunnelRow outputRow = new SeaTunnelRow(outputDataArray);

        outputRow.setRowKind(inputRow.getRowKind());

        outputRow.setTableId(inputRow.getTableId());

        return outputRow;

    }



    @Override

    protected TableSchema transformTableSchema() {

        List<Column> inputColumns = inputCatalogTable.getTableSchema().getColumns();

        List<ConstraintKey> outputConstraintKeys =

                inputCatalogTable.getTableSchema().getConstraintKeys().stream()

                        .map(ConstraintKey::copy)

                        .collect(Collectors.toList());

        PrimaryKey copiedPrimaryKey =

                inputCatalogTable.getTableSchema().getPrimaryKey() == null

                        ? null

                        : inputCatalogTable.getTableSchema().getPrimaryKey().copy();



        if (CollUtil.isEmpty(this.config.getFieldColumns())) {

            return TableSchema.builder()

                    .primaryKey(copiedPrimaryKey)

                    .columns(inputColumns)

                    .constraintKey(outputConstraintKeys)

                    .build();

        } else {

            List<Column> transformColumns = new ArrayList<>();

            for (String key : this.config.getFieldColumns().keySet()) {

                SeaTunnelDataType<?> dataType = CatalogTableUtil.parseDataType(this.config.getFieldColumns().get(key));

                transformColumns.add(PhysicalColumn.of(key, dataType, 0, true, null, null));

            }

            return TableSchema.builder()

                    .primaryKey(copiedPrimaryKey)

                    .columns(transformColumns)

                    .constraintKey(outputConstraintKeys)

                    .build();

        }

    }



    @Override

    protected TableIdentifier transformTableIdentifier() {

        return inputCatalogTable.getTableId().copy();

    }

}

文中的转换实现的是AbstractCatalogSupportTransform类,Seatunel还提供SingleFieldOutputTransform和MultipleFieldOutputTransform,分别对应单字段和多字段的数据处理,具体扩展可根据需求来实现对应的类

执行结果

来源消息

结果消息

以上就是对转换插件的扩展分享,有需求的小伙伴可以参考,也欢迎大家一起评论沟通~

相关推荐
Coovally AI模型快速验证30 分钟前
MMYOLO:打破单一模式限制,多模态目标检测的革命性突破!
人工智能·算法·yolo·目标检测·机器学习·计算机视觉·目标跟踪
可为测控1 小时前
图像处理基础(4):高斯滤波器详解
人工智能·算法·计算机视觉
Milk夜雨2 小时前
头歌实训作业 算法设计与分析-贪心算法(第3关:活动安排问题)
算法·贪心算法
BoBoo文睡不醒2 小时前
动态规划(DP)(细致讲解+例题分析)
算法·动态规划
apz_end2 小时前
埃氏算法C++实现: 快速输出质数( 素数 )
开发语言·c++·算法·埃氏算法
仟濹3 小时前
【贪心算法】洛谷P1106 - 删数问题
c语言·c++·算法·贪心算法
CM莫问4 小时前
python实战(十五)——中文手写体数字图像CNN分类
人工智能·python·深度学习·算法·cnn·图像分类·手写体识别
sz66cm4 小时前
LeetCode刷题 -- 45.跳跃游戏 II
算法·leetcode
Amor风信子4 小时前
华为OD机试真题---战场索敌
java·开发语言·算法·华为od·华为
old_power5 小时前
【PCL】Segmentation 模块—— 基于图割算法的点云分割(Min-Cut Based Segmentation)
c++·算法·计算机视觉·3d