Flink数据源的读写介入体系

Flink连接器

Flink 连接器包含数据源输入与汇聚输出两部分。

**读取:**Flink自身内置了一些基础的连接器,数据源输入包含文件、目录、Socket以及 支持从collections 和 iterators 中读取数据;

**写出:**汇聚输出支持把数据写入文件、标准输出(stdout)、标准错误输出(stderr)和 socket。
以下是官方提供的第三方接入连接器:(source代表读取,sink代表写出)
Apache Kafka (source/sink)
Apache Cassandra (sink)
Amazon Kinesis Streams (source/sink)
Elasticsearch (sink)
Hadoop FileSystem (sink)
RabbitMQ (source/sink)
Apache NiFi (source/sink)
Twitter Streaming API (source)
Google PubSub (source/sink)
JDBC (sink)

JDBC读写

创建数据表

sql 复制代码
CREATE TABLE `t_access_log` (
	`id` BIGINT ( 20 ) NOT NULL AUTO_INCREMENT COMMENT '主键ID',
	`ip` VARCHAR ( 32 ) NOT NULL COMMENT 'IP地址',
	`time` VARCHAR ( 255 ) NULL DEFAULT NULL COMMENT '访问时间',
	`type` VARCHAR ( 32 ) NOT NULL COMMENT '请求类型',
	`api` VARCHAR ( 32 ) NOT NULL COMMENT 'API地址',
PRIMARY KEY ( `id` ) 
) ENGINE = InnoDB AUTO_INCREMENT = 1;

AccessLog实体类

java 复制代码
@Data
public class AccessLog {

    private String ip;

    private String time;

    private String type;

    private String api;
}

1、从ArrayList读取数据,插入到MySQL

java 复制代码
public class JdbcConnectorsApplication {
    public static void main(String[] args) throws Exception {
        //1.创建运行环境
        StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();

        //2.准备集合数据
        List<String> arrayList = new ArrayList<>();
        arrayList.add("10.10.20.101\t1601297294548\tPOST\taddOrder");
        arrayList.add("10.10.20.102\t1601297296549\tGET\tgetOrder");

        //3.读取集合数据,写入数据库
        streamExecutionEnvironment.fromCollection(arrayList).addSink(JdbcSink.sink(
                "insert into t_access_log(ip,time,type,api) values(?,?,?,?)",
                ((preparedStatement, data) -> {
                    System.out.println("data:"+data);
                    // 解析数据
                    String[] arrValue = String.valueOf(data).split("\t");
                    for (int i = 0; i < arrValue.length; i++) {
                        // 新增数据
                        preparedStatement.setString(i+1,arrValue[i]);
                    }
                }),
                new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
                        .withUrl("jdbc:mysql://127.0.0.1:3306/flink_demo?useSSL=false")
                        .withDriverName("com.mysql.jdbc.Driver")
                        .withUsername("root")
                        .withPassword("123456")
                        .build()
        ));
        //执行任务
        streamExecutionEnvironment.execute("jdbc-job");

    }
}

2、 自定义写入MySQL

java 复制代码
public class CustomSinkApplication {

    public static void main(String[] args) throws Exception {
        // 1. 创建 Flink 的流处理环境
        StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2. 从 Socket 数据源读取数据
        // 参数分别为:主机名、端口号、行分隔符
        DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.socketTextStream("localhost", 9999, "\n");

        // 3. 转换处理流数据
        // 使用 map 函数将读取到的字符串数据转换为 AccessLog 对象
        SingleOutputStreamOperator<AccessLog> outputStream = dataStreamSource.map(new MapFunction<String, AccessLog>() {
            @Override
            public AccessLog map(String line) {
                // 将字符串按逗号分隔成数组
                String[] arrValue = line.split(",");

                // 创建一个新的 AccessLog 对象
                AccessLog accessLog = new AccessLog();

                // 根据数组的索引将数据赋值给 AccessLog 对象的相应属性
                for (int i = 0; i < arrValue.length; i++) {
                    switch (i) {
                        case 0:
                            accessLog.setIp(arrValue[i]);
                            break;
                        case 1:
                            accessLog.setTime(arrValue[i]);
                            break;
                        case 2:
                            accessLog.setType(arrValue[i]);
                            break;
                        case 3:
                            accessLog.setApi(arrValue[i]);
                            break;
                    }
                }

                // 返回转换后的 AccessLog 对象
                return accessLog;
            }
        });

        // 4. 配置自定义写入数据源
        // 将处理后的 AccessLog 对象写入到自定义的 MySQLSinkFunction 中
        outputStream.addSink(new MySQLSinkFunction());

        // 5. 执行任务
        // 启动 Flink 作业,开始执行流处理任务
        streamExecutionEnvironment.execute("CustomSinkApplication");
    }
}

其中,在将outputStream写入到mysql时,需要自定义MySQL写入的function

复制代码
outputStream.addSink(new MySQLSinkFunction());
java 复制代码
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;

public class MySQLSinkFunction extends RichSinkFunction<AccessLog> {

    private Connection connection;
    private PreparedStatement preparedStatement;

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);

        // 初始化数据库连接
        String url = "jdbc:mysql://127.0.0.1:3306/flink_demo?useSSL=false";
        String user = "root";
        String password = "123456";

        // 使用 JDBC 驱动建立数据库连接
        connection = DriverManager.getConnection(url, user, password);

        // 准备插入语句
        String sql = "INSERT INTO t_access_log (ip, time, type, api) VALUES (?, ?, ?, ?)";
        // 创建预编译的插入语句
        preparedStatement = connection.prepareStatement(sql);
    }

    @Override
    public void invoke(AccessLog data, Context context) throws Exception {
        // 设置插入语句的参数
        preparedStatement.setString(1, data.getIp());
        preparedStatement.setString(2, data.getTime());
        preparedStatement.setString(3, data.getType());
        preparedStatement.setString(4, data.getApi());

        // 执行插入操作
        preparedStatement.executeUpdate();
    }

    @Override
    public void close() throws Exception {
        super.close();

        // 关闭预编译的插入语句
        if (preparedStatement != null) {
            preparedStatement.close();
        }

        // 关闭数据库连接
        if (connection != null) {
            connection.close();
        }
    }
}

3、自定义从MySQL读入数据(官方只提供写入到MySQL的连接器)

java 复制代码
/**
 * 自定义写入
 */
public class CustomSourceApplication {

    public static void main(String[] args) throws Exception{
        // 1. 创建运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 2. 配置自定义MySQL读取数据源
        DataStreamSource<AccessLog> dataStream = env.addSource(new MySQLSourceFunction());
        // 3. 设置并行度
        dataStream.print().setParallelism(1);
        // 4. 执行任务
        env.execute("custom jdbc source.");
    }

}

读入function

java 复制代码
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

public class MySQLSourceFunction extends RichSourceFunction<AccessLog> {

    private Connection connection;
    private PreparedStatement preparedStatement;

    @Override
    public void open(Configuration parameters) throws Exception {
        super.open(parameters);

        // 初始化数据库连接
        String url = "jdbc:mysql://127.0.0.1:3306/flink_demo?useSSL=false";
        String user = "root";
        String password = "123456";

        // 使用 JDBC 驱动建立数据库连接
        connection = DriverManager.getConnection(url, user, password);

        // 准备查询语句
        String sql = "SELECT ip, time, type, api FROM t_access_log";
        // 创建预编译的查询语句
        preparedStatement = connection.prepareStatement(sql);
    }

    @Override
    public void run(SourceContext<AccessLog> ctx) throws Exception {
        // 执行查询语句并获取结果集
        ResultSet resultSet = preparedStatement.executeQuery();

        // 遍历结果集,将每条记录转换为 AccessLog 对象并发送到下游
        while (resultSet.next()) {
            AccessLog accessLog = new AccessLog();
            accessLog.setIp(resultSet.getString("ip"));
            accessLog.setApi(resultSet.getString("api"));
            accessLog.setType(resultSet.getString("type"));
            accessLog.setTime(resultSet.getString("time"));

            // 使用 SourceContext 的 collect 方法将数据发送到下游
            ctx.collect(accessLog);
        }
    }

    @Override
    public void close() throws Exception {
        super.close();

        // 关闭预编译的查询语句
        if (preparedStatement != null) {
            preparedStatement.close();
        }

        // 关闭数据库连接
        if (connection != null) {
            connection.close();
        }
    }

    @Override
    public void cancel() {
        // 当 Flink 作业被取消时,调用此方法
        // 可以在这里添加额外的清理逻辑
    }
}

基于Netty的数据模拟

创建一个新的Maven模块

XML 复制代码
    <dependencies>
        <!-- Netty 核心组件依赖 -->
        <dependency>
            <groupId>io.netty</groupId>
            <artifactId>netty-all</artifactId>
            <version>4.1.16.Final</version>
        </dependency>
        <!-- spring boot 依赖 -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
            <version>${spring.boot.version}</version>
        </dependency>
        <!-- Spring data jpa 组件依赖-->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-jpa</artifactId>
            <version>${spring.boot.version}</version>
        </dependency>
        <!-- mysql-connector-java -->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>${mysql.jdbc.version}</version>
        </dependency>
        <!-- Redis 缓存依赖 -->
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-redis</artifactId>
            <version>2.1.1.RELEASE</version>
        </dependency>
    </dependencies>

SocketSourceApplication

java 复制代码
package com.lcc.flink.sources.socket;

import com.lcc.flink.sources.socket.netty.NettyServerHandler;
import io.netty.bootstrap.ServerBootstrap;
import io.netty.channel.*;
import io.netty.channel.nio.NioEventLoopGroup;
import io.netty.channel.socket.SocketChannel;
import io.netty.channel.socket.nio.NioServerSocketChannel;
import io.netty.handler.codec.string.StringDecoder;
import io.netty.handler.codec.string.StringEncoder;
import io.netty.handler.logging.LogLevel;
import io.netty.handler.logging.LoggingHandler;

import java.util.Random;

public class SocketSourceApplication {

    /**
     * 服务端的端口
     */
    private int port;

    /**
     * 初始化构造方法
     * @param port
     */
    public SocketSourceApplication(int port) {
        this.port = port;
    }

    /**
     * IP 访问列表
     */
    private static String[] accessIps = new String[]{"10.10.20.101", "10.10.20.102", "10.10.20.103"};

    /**
     * 请求访问类型
     */
    private static String[] accessTypes = new String[] {"GET", "POST", "PUT"};

    /**
     * 请求接口信息
     */
    private static String[] accessApis = new String[] {"addOrder", "getAccount", "getOrder"};


    /**
     * Netty通讯服务启动方法
     * @throws Exception
     */
    public void runServer() throws Exception  {
        // 1. 创建Netty服务
        // 2. 定义事件Boss监听组
        EventLoopGroup bossGroup = new NioEventLoopGroup();
        // 3. 定义用来处理已经被接收的连接
        EventLoopGroup workerGourp = new NioEventLoopGroup();
        try {
            // 4. 定义NIO的服务启动类
            ServerBootstrap sbs = new ServerBootstrap();
            // 5. 配置NIO服务启动的相关参数
            sbs.group(bossGroup, workerGourp)
                    .channel(NioServerSocketChannel.class)
                    // tcp最大缓存链接个数,它是tcp的参数, tcp_max_syn_backlog(半连接上限数量, CENTOS6.5默认是128)
                    .option(ChannelOption.SO_BACKLOG, 128)
                    //保持连接的正常状态
                    .childOption(ChannelOption.SO_KEEPALIVE, true)
                    // 根据日志级别打印输出
                    .handler(new LoggingHandler(LogLevel.INFO))
                    .childHandler(new ChannelInitializer<SocketChannel>() {
                        @Override
                        protected void initChannel(SocketChannel socketChannel) throws Exception {
                            //管道注册handler
                            ChannelPipeline pipeline = socketChannel.pipeline();
                            //编码通道处理
                            pipeline.addLast("decode", new StringDecoder());
                            //转码通道处理
                            pipeline.addLast("encode", new StringEncoder());
                            // 处理接收到的请求
                            pipeline.addLast(new NettyServerHandler());
                        }
                    });

            System.err.println("-------server 启动------");
            // 6. 开启新线程, 模拟数据,广播发送
            new Thread(new Runnable() {
                @Override
                public void run() {
                    try {
                        while(true) {
                            String accessLog = getAccessLog();
                            System.out.println("broadcast (" + NettyServerHandler.channelList.size() + ") ==> " + accessLog);
                            if(NettyServerHandler.channelList.size()  > 0 ){
                                for(Channel channel : NettyServerHandler.channelList) {
                                    channel.writeAndFlush(accessLog);
                                }
                            }
                            Thread.sleep(1000);
                        }
                    }catch(Exception e) {
                        e.printStackTrace();
                    }
                }
            }).start();

            // 7. 启动netty服务
            ChannelFuture cf = sbs.bind(port).sync();
            cf.channel().closeFuture().sync();

        }catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 获取访问日志
     * @return
     */
    private String getAccessLog() {
        StringBuilder strBuilder = new StringBuilder();
        strBuilder.append(accessIps[new Random().nextInt(accessIps.length )]).append("\t")
                .append(System.currentTimeMillis()).append("\t")
                .append(accessTypes[new Random().nextInt(accessTypes.length)]).append("\t")
                .append(accessApis[new Random().nextInt(accessApis.length)]).append("\t\n");
        return strBuilder.toString();
    }

    /**
     * netty服务端的启动
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception{
        new SocketSourceApplication(9911).runServer();
    }


}

NettyHandler

java 复制代码
package com.lcc.flink.sources.socket.netty;

import io.netty.channel.Channel;
import io.netty.channel.ChannelHandler;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInboundHandlerAdapter;
import io.netty.util.concurrent.EventExecutorGroup;

import java.util.ArrayList;
import java.util.List;

public class NettyServerHandler extends ChannelInboundHandlerAdapter {

    // 客户端通道记录集合
    public static List<Channel> channelList = new ArrayList<>();

    @Override
    public void channelActive(ChannelHandlerContext ctx) throws Exception {
        System.out.println("Server---连接已建立: " + ctx);
        super.channelActive(ctx);
        // 将成功建立的连接通道, 加入到集合当中
        channelList.add(ctx.channel());
    }

    @Override
    public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception {
        System.out.println("Server---收到的消息: " + msg);
    }

    @Override
    public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
        System.out.println("server--读取数据出现异常");
        cause.printStackTrace();
        ctx.close();
    }

    @Override
    public void channelUnregistered(ChannelHandlerContext ctx) throws Exception {
        super.channelUnregistered(ctx);
        // 移除无效的连接通道
        channelList.remove(ctx.channel());
    }

    @Override
    public void channelInactive(ChannelHandlerContext ctx) throws Exception {
        super.channelInactive(ctx);
        // 移除无效的连接通道
        channelList.remove(ctx.channel());
    }
}

HDFS读写

Hadoop安装教程

详细教程-Linux上安装单机版的Hadoop-CSDN博客

写出数据到指定目录

引入坐标

XML 复制代码
    <dependencies>
        <!--连接器文件系统的操作依赖-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-filesystem_2.11</artifactId>
            <version>1.11.2</version>
        </dependency>
        <!--Hadoop 客户端的操作依赖-->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.8.1</version>
        </dependency>
    </dependencies>
java 复制代码
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink;
import org.apache.flink.streaming.connectors.fs.bucketing.DateTimeBucketer;
import org.apache.flink.streaming.connectors.fs.bucketing.StringWriter;

public class HdfsSinkApplication {

    public static void main(String[] args) throws Exception {
        // 1. 创建 Flink 的流处理环境
        StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2. 从 Socket 数据源读取数据
        // 参数分别为:主机名、端口号、行分隔符
        DataStreamSource<String> socketTextStream = executionEnvironment.socketTextStream("localhost", 9911, "\n");

        // 3. 配置 HDFS Sink
        // 指定输出目录
        BucketingSink<String> sink = new BucketingSink<>("C:\\Users\\hp-pc\\Desktop\\hdfs");

        // 3.1 设置桶策略
        // 使用日期时间作为桶的命名策略,格式为 "yyyy-MM-dd-HHmm"
        sink.setBucketer(new DateTimeBucketer<>("yyyy-MM-dd-HHmm"));

        // 3.2 设置写入器
        // 使用默认的字符串写入器
        sink.setWriter(new StringWriter())
                // 3.3 设置每个批次的大小(字节)
                .setBatchSize(5 * 1024)
                // 3.4 设置每个批次的最大滚动时间(毫秒)
                .setBatchRolloverInterval(5 * 1000)
                // 3.5 设置检查不活跃桶的时间间隔(毫秒)
                .setInactiveBucketCheckInterval(30 * 1000)
                // 3.6 设置不活跃桶的最大阈值时间(毫秒)
                .setInactiveBucketThreshold(60 * 1000);

        // 4. 将数据流添加到 HDFS Sink,并设置并行度为 1
        socketTextStream.addSink(sink).setParallelism(1);

        // 5. 执行任务
        // 启动 Flink 作业,开始执行流处理任务
        executionEnvironment.execute("job");
    }
}

从Hdfs读取数据

java 复制代码
public class HdfsSourceApplication {

    public static void main(String[] args) throws Exception{
        StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.readTextFile("hdfs://192.168.149.128:9090/hadoop-env.sh");

        dataStreamSource.print().setParallelism(1);

        // 4. 执行任务
        streamExecutionEnvironment.execute("job");
    }

}

输出结果

ES读写

ES的单机版安装教程

Linux上安装单机版ElasticSearch6.8.1-CSDN博客

flink将数据写入到ES的代码实现

引入依赖

XML 复制代码
    <dependencies>
        <!-- ElasticSearch 连接器依赖-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-elasticsearch6_2.11</artifactId>
            <version>1.7.2</version>
        </dependency>
    </dependencies>
java 复制代码
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.xcontent.XContentType;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSink;
import org.apache.flink.streaming.connectors.elasticsearch7.ElasticsearchSink.Builder;
import org.apache.flink.streaming.connectors.elasticsearch.util.RetryRejectedExecutionFailureHandler;
import org.apache.flink.api.common.functions.RuntimeContext;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class Main {

    public static void main(String[] args) throws Exception {
        // 1. 创建 Flink 的流处理环境
        StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2. 从 Socket 数据源读取数据
        // 参数分别为:主机名、端口号、行分隔符
        DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.socketTextStream("localhost", 9911, "\n");

        // 3. 配置 ES 服务信息
        List<HttpHost> httpHosts = new ArrayList<>();
        httpHosts.add(new HttpHost("192.168.149.128", 9200, "http"));

        // 4. 数据解析处理
        ElasticsearchSink.Builder<String> esSinkBuilder = new ElasticsearchSink.Builder<>(
                httpHosts,
                new ElasticsearchSinkFunction<String>() {
                    @Override
                    public void process(String line, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
                        // 创建一个 JSON 对象来存储解析后的数据
                        Map<String, String> json = new HashMap<>();

                        // 分割输入的行数据
                        String[] split = line.split("\t");

                        // 将分割后的数据放入 JSON 对象中
                        for (int i = 0; i < split.length; i++) {
                            if (i == 0) {
                                json.put("ip", split[i]);
                            } else if (i == 1) {
                                json.put("time", split[i]);
                            } else if (i == 2) {
                                json.put("type", split[i]);
                            } else if (i == 3) {
                                json.put("api", split[i]);
                            }
                        }

                        // 创建一个索引请求
                        IndexRequest indexRequest = Requests.indexRequest()
                                .index("flink-es") // 指定索引名称
                                .type("access-log") // 指定类型(在 ES 7.x 中,类型已被弃用,但这里保留)
                                .source(json, XContentType.JSON); // 设置数据源和内容类型

                        // 将索引请求添加到请求索引器中
                        requestIndexer.add(indexRequest);
                    }
                }
        );

        // 5. ES 的写入配置
        esSinkBuilder.setBulkFlushMaxActions(1); // 设置批量刷新的最大动作数
        esSinkBuilder.setRestClientFactory(
                restClientBuilder -> {
                    restClientBuilder.setMaxRetryTimeoutMillis(5000); // 设置最大重试超时时间
                }
        );

        // 6. 添加 ES 的写入器
        dataStreamSource.addSink(esSinkBuilder.build());

        // 7. 打印数据流(调试用)
        dataStreamSource.print().setParallelism(1);

        // 8. 执行任务
        streamExecutionEnvironment.execute("ES-job"); // 启动 Flink 作业,开始执行流处理任务
    }
}

Kafka读写

Kafka的单机版安装教程

Linux上安装单机版Kafka-CSDN博客

引入坐标

XML 复制代码
    <dependencies>
        <!-- flink kafka连接器依赖 -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.11</artifactId>
            <version>1.11.2</version>
        </dependency>
    </dependencies>
java 复制代码
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import java.util.Properties;

public class KafkaSourceApplication {

    public static void main(String[] args) throws Exception {
        // 1. 创建 Flink 的流处理环境
        StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2. 配置 Kafka 连接属性
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "192.168.149.128:9092"); // Kafka 服务器地址
        properties.setProperty("group.id", "flink_group"); // 消费者组 ID

        // 3. 创建 Flink Kafka Consumer
        FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>(
            "flink-source", // Kafka 主题名称
            new SimpleStringSchema(), // 序列化方案,这里使用简单的字符串序列化
            properties // Kafka 连接属性
        );

        // 4. 设置 Kafka 消费的起始位置
        kafkaConsumer.setStartFromEarliest();     // 尽可能从最早的记录开始
        // kafkaConsumer.setStartFromLatest();       // 从最新的记录开始
        // kafkaConsumer.setStartFromTimestamp(...); // 从指定的时间开始(毫秒)
        // kafkaConsumer.setStartFromGroupOffsets(); // 默认的方法,从指定偏移量进行消费

        // 5. 添加 Kafka 源到 Flink 流处理环境中
        DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.addSource(kafkaConsumer);

        // 6. 打印数据流(调试用)
        dataStreamSource.print().setParallelism(1); // 设置并行度为 1,确保数据按顺序打印

        // 7. 执行任务
        streamExecutionEnvironment.execute("kafka-job"); // 启动 Flink 作业,开始执行流处理任务
    }
}

从Flink写入数据到Kafka

java 复制代码
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper;
import java.util.Properties;

public class KafkaSinkApplication {

    public static void main(String[] args) throws Exception {
        // 1. 创建 Flink 的流处理环境
        StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2. 配置 Kafka 连接属性
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "192.168.149.128:9092"); // Kafka 服务器地址

        // 3. 从 Socket 数据源读取数据
        DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.socketTextStream("localhost", 9911, "\n");

        // 4. 创建 Flink Kafka Producer
        FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(
            "flink-sink-topic", // Kafka 主题名称
            new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), // 序列化方案
            properties, // Kafka 连接属性
            FlinkKafkaProducer.Semantic.AT_LEAST_ONCE // 至少消费一次,可能造成消息重复消费
        );

        // 5. 将数据流添加到 Kafka 生产者
        dataStreamSource.addSink(kafkaProducer);

        // 6. 打印数据流(调试用)
        dataStreamSource.print().setParallelism(1); // 设置并行度为 1,确保数据按顺序打印

        // 7. 执行任务
        streamExecutionEnvironment.execute("kafka-producer-job"); // 启动 Flink 作业,开始执行流处理任务
    }
}

自定义序列化方式,实现flink对kafka的读写

在实际应用场景中, 会存在各种复杂传输对象,同时要求较高的传输处理性能, 这就需要采用自定义的序列化方式做相应实现, 这里以Protobuf为例做讲解。
功能: kafka 对同一 Topic 的生产与消费,采用 Protobuf 做序列化与反序列化传输,验证能否正常解析数据。

protobuf的用法

1. 通过protobuf脚本生成JAVA文件

在resource目录下创建proto文件

AccessLog.proto

java 复制代码
syntax = "proto3";
option java_package = "com.lcc.flink.connectors.kafka.proto";
option java_outer_classname = "AccessLogProto";

// 消息结构定义
message AccessLog {
  string ip = 1;

  string time = 2;

  string type = 3;

  string api = 4;

  string num = 5;
}

指定protoc脚本生成Java类

protoc.exe

链接:https://pan.baidu.com/s/1bE2ZpwSA1A0TZOjJsR24lA

提取码:96sr

创建protoc_flink.bat文件,内容如下

bash 复制代码
@echo off
for %%i in (proto/*.proto) do (
 D:/dev_directory/proto/protoc.exe --proto_path=./proto --java_out=../java ./proto/%%i
 echo generate %%i to java file successfully!
)

执行这个bat脚本,我们就在指定包下生成了proto的java类

java 复制代码
public final class AccessLogProto {
  private AccessLogProto() {}
  public static void registerAllExtensions(
      com.google.protobuf.ExtensionRegistryLite registry) {
  }

  public static void registerAllExtensions(
      com.google.protobuf.ExtensionRegistry registry) {
    registerAllExtensions(
        (com.google.protobuf.ExtensionRegistryLite) registry);
  }
  public interface AccessLogOrBuilder extends
      // @@protoc_insertion_point(interface_extends:AccessLog)
      com.google.protobuf.MessageOrBuilder {

    /**
     * <code>string ip = 1;</code>
     */
    java.lang.String getIp();
    /**
     * <code>string ip = 1;</code>
     */
    com.google.protobuf.ByteString
        getIpBytes();

    /**
     * <code>string time = 2;</code>
     */
    java.lang.String getTime();
    /**
     * <code>string time = 2;</code>
     */
    com.google.protobuf.ByteString
        getTimeBytes();

    /**
     * <code>string type = 3;</code>
     */
    java.lang.String getType();
    /**
     * <code>string type = 3;</code>
     */
    com.google.protobuf.ByteString
        getTypeBytes();

    /**
     * <code>string api = 4;</code>
     */
    java.lang.String getApi();
    /**
     * <code>string api = 4;</code>
     */
    com.google.protobuf.ByteString
        getApiBytes();

    /**
     * <code>string num = 5;</code>
     */
    java.lang.String getNum();
    /**
     * <code>string num = 5;</code>
     */
    com.google.protobuf.ByteString
        getNumBytes();
  }
  /**
   * <pre>
   * 消息结构定义
   * </pre>
   *
   * Protobuf type {@code AccessLog}
   */
  public  static final class AccessLog extends
      com.google.protobuf.GeneratedMessageV3 implements
      // @@protoc_insertion_point(message_implements:AccessLog)
      AccessLogOrBuilder {
  private static final long serialVersionUID = 0L;
    // Use AccessLog.newBuilder() to construct.
    private AccessLog(com.google.protobuf.GeneratedMessageV3.Builder<?> builder) {
      super(builder);
    }
    private AccessLog() {
      ip_ = "";
      time_ = "";
      type_ = "";
      api_ = "";
      num_ = "";
    }

    @java.lang.Override
    @SuppressWarnings({"unused"})
    protected java.lang.Object newInstance(
        UnusedPrivateParameter unused) {
      return new AccessLog();
    }

    @java.lang.Override
    public final com.google.protobuf.UnknownFieldSet
    getUnknownFields() {
      return this.unknownFields;
    }
    private AccessLog(
        com.google.protobuf.CodedInputStream input,
        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
        throws com.google.protobuf.InvalidProtocolBufferException {
      this();
      if (extensionRegistry == null) {
        throw new java.lang.NullPointerException();
      }
      com.google.protobuf.UnknownFieldSet.Builder unknownFields =
          com.google.protobuf.UnknownFieldSet.newBuilder();
      try {
        boolean done = false;
        while (!done) {
          int tag = input.readTag();
          switch (tag) {
            case 0:
              done = true;
              break;
            case 10: {
              java.lang.String s = input.readStringRequireUtf8();

              ip_ = s;
              break;
            }
            case 18: {
              java.lang.String s = input.readStringRequireUtf8();

              time_ = s;
              break;
            }
            case 26: {
              java.lang.String s = input.readStringRequireUtf8();

              type_ = s;
              break;
            }
            case 34: {
              java.lang.String s = input.readStringRequireUtf8();

              api_ = s;
              break;
            }
            case 42: {
              java.lang.String s = input.readStringRequireUtf8();

              num_ = s;
              break;
            }
            default: {
              if (!parseUnknownField(
                  input, unknownFields, extensionRegistry, tag)) {
                done = true;
              }
              break;
            }
          }
        }
      } catch (com.google.protobuf.InvalidProtocolBufferException e) {
        throw e.setUnfinishedMessage(this);
      } catch (java.io.IOException e) {
        throw new com.google.protobuf.InvalidProtocolBufferException(
            e).setUnfinishedMessage(this);
      } finally {
        this.unknownFields = unknownFields.build();
        makeExtensionsImmutable();
      }
    }
    public static final com.google.protobuf.Descriptors.Descriptor
        getDescriptor() {
      return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_descriptor;
    }

    @java.lang.Override
    protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
        internalGetFieldAccessorTable() {
      return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_fieldAccessorTable
          .ensureFieldAccessorsInitialized(
              com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.class, com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.Builder.class);
    }

    public static final int IP_FIELD_NUMBER = 1;
    private volatile java.lang.Object ip_;
    /**
     * <code>string ip = 1;</code>
     */
    public java.lang.String getIp() {
      java.lang.Object ref = ip_;
      if (ref instanceof java.lang.String) {
        return (java.lang.String) ref;
      } else {
        com.google.protobuf.ByteString bs = 
            (com.google.protobuf.ByteString) ref;
        java.lang.String s = bs.toStringUtf8();
        ip_ = s;
        return s;
      }
    }
    /**
     * <code>string ip = 1;</code>
     */
    public com.google.protobuf.ByteString
        getIpBytes() {
      java.lang.Object ref = ip_;
      if (ref instanceof java.lang.String) {
        com.google.protobuf.ByteString b = 
            com.google.protobuf.ByteString.copyFromUtf8(
                (java.lang.String) ref);
        ip_ = b;
        return b;
      } else {
        return (com.google.protobuf.ByteString) ref;
      }
    }

    public static final int TIME_FIELD_NUMBER = 2;
    private volatile java.lang.Object time_;
    /**
     * <code>string time = 2;</code>
     */
    public java.lang.String getTime() {
      java.lang.Object ref = time_;
      if (ref instanceof java.lang.String) {
        return (java.lang.String) ref;
      } else {
        com.google.protobuf.ByteString bs = 
            (com.google.protobuf.ByteString) ref;
        java.lang.String s = bs.toStringUtf8();
        time_ = s;
        return s;
      }
    }
    /**
     * <code>string time = 2;</code>
     */
    public com.google.protobuf.ByteString
        getTimeBytes() {
      java.lang.Object ref = time_;
      if (ref instanceof java.lang.String) {
        com.google.protobuf.ByteString b = 
            com.google.protobuf.ByteString.copyFromUtf8(
                (java.lang.String) ref);
        time_ = b;
        return b;
      } else {
        return (com.google.protobuf.ByteString) ref;
      }
    }

    public static final int TYPE_FIELD_NUMBER = 3;
    private volatile java.lang.Object type_;
    /**
     * <code>string type = 3;</code>
     */
    public java.lang.String getType() {
      java.lang.Object ref = type_;
      if (ref instanceof java.lang.String) {
        return (java.lang.String) ref;
      } else {
        com.google.protobuf.ByteString bs = 
            (com.google.protobuf.ByteString) ref;
        java.lang.String s = bs.toStringUtf8();
        type_ = s;
        return s;
      }
    }
    /**
     * <code>string type = 3;</code>
     */
    public com.google.protobuf.ByteString
        getTypeBytes() {
      java.lang.Object ref = type_;
      if (ref instanceof java.lang.String) {
        com.google.protobuf.ByteString b = 
            com.google.protobuf.ByteString.copyFromUtf8(
                (java.lang.String) ref);
        type_ = b;
        return b;
      } else {
        return (com.google.protobuf.ByteString) ref;
      }
    }

    public static final int API_FIELD_NUMBER = 4;
    private volatile java.lang.Object api_;
    /**
     * <code>string api = 4;</code>
     */
    public java.lang.String getApi() {
      java.lang.Object ref = api_;
      if (ref instanceof java.lang.String) {
        return (java.lang.String) ref;
      } else {
        com.google.protobuf.ByteString bs = 
            (com.google.protobuf.ByteString) ref;
        java.lang.String s = bs.toStringUtf8();
        api_ = s;
        return s;
      }
    }
    /**
     * <code>string api = 4;</code>
     */
    public com.google.protobuf.ByteString
        getApiBytes() {
      java.lang.Object ref = api_;
      if (ref instanceof java.lang.String) {
        com.google.protobuf.ByteString b = 
            com.google.protobuf.ByteString.copyFromUtf8(
                (java.lang.String) ref);
        api_ = b;
        return b;
      } else {
        return (com.google.protobuf.ByteString) ref;
      }
    }

    public static final int NUM_FIELD_NUMBER = 5;
    private volatile java.lang.Object num_;
    /**
     * <code>string num = 5;</code>
     */
    public java.lang.String getNum() {
      java.lang.Object ref = num_;
      if (ref instanceof java.lang.String) {
        return (java.lang.String) ref;
      } else {
        com.google.protobuf.ByteString bs = 
            (com.google.protobuf.ByteString) ref;
        java.lang.String s = bs.toStringUtf8();
        num_ = s;
        return s;
      }
    }
    /**
     * <code>string num = 5;</code>
     */
    public com.google.protobuf.ByteString
        getNumBytes() {
      java.lang.Object ref = num_;
      if (ref instanceof java.lang.String) {
        com.google.protobuf.ByteString b = 
            com.google.protobuf.ByteString.copyFromUtf8(
                (java.lang.String) ref);
        num_ = b;
        return b;
      } else {
        return (com.google.protobuf.ByteString) ref;
      }
    }

    private byte memoizedIsInitialized = -1;
    @java.lang.Override
    public final boolean isInitialized() {
      byte isInitialized = memoizedIsInitialized;
      if (isInitialized == 1) return true;
      if (isInitialized == 0) return false;

      memoizedIsInitialized = 1;
      return true;
    }

    @java.lang.Override
    public void writeTo(com.google.protobuf.CodedOutputStream output)
                        throws java.io.IOException {
      if (!getIpBytes().isEmpty()) {
        com.google.protobuf.GeneratedMessageV3.writeString(output, 1, ip_);
      }
      if (!getTimeBytes().isEmpty()) {
        com.google.protobuf.GeneratedMessageV3.writeString(output, 2, time_);
      }
      if (!getTypeBytes().isEmpty()) {
        com.google.protobuf.GeneratedMessageV3.writeString(output, 3, type_);
      }
      if (!getApiBytes().isEmpty()) {
        com.google.protobuf.GeneratedMessageV3.writeString(output, 4, api_);
      }
      if (!getNumBytes().isEmpty()) {
        com.google.protobuf.GeneratedMessageV3.writeString(output, 5, num_);
      }
      unknownFields.writeTo(output);
    }

    @java.lang.Override
    public int getSerializedSize() {
      int size = memoizedSize;
      if (size != -1) return size;

      size = 0;
      if (!getIpBytes().isEmpty()) {
        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, ip_);
      }
      if (!getTimeBytes().isEmpty()) {
        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, time_);
      }
      if (!getTypeBytes().isEmpty()) {
        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(3, type_);
      }
      if (!getApiBytes().isEmpty()) {
        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(4, api_);
      }
      if (!getNumBytes().isEmpty()) {
        size += com.google.protobuf.GeneratedMessageV3.computeStringSize(5, num_);
      }
      size += unknownFields.getSerializedSize();
      memoizedSize = size;
      return size;
    }

    @java.lang.Override
    public boolean equals(final java.lang.Object obj) {
      if (obj == this) {
       return true;
      }
      if (!(obj instanceof com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog)) {
        return super.equals(obj);
      }
      com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog other = (com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog) obj;

      if (!getIp()
          .equals(other.getIp())) return false;
      if (!getTime()
          .equals(other.getTime())) return false;
      if (!getType()
          .equals(other.getType())) return false;
      if (!getApi()
          .equals(other.getApi())) return false;
      if (!getNum()
          .equals(other.getNum())) return false;
      if (!unknownFields.equals(other.unknownFields)) return false;
      return true;
    }

    @java.lang.Override
    public int hashCode() {
      if (memoizedHashCode != 0) {
        return memoizedHashCode;
      }
      int hash = 41;
      hash = (19 * hash) + getDescriptor().hashCode();
      hash = (37 * hash) + IP_FIELD_NUMBER;
      hash = (53 * hash) + getIp().hashCode();
      hash = (37 * hash) + TIME_FIELD_NUMBER;
      hash = (53 * hash) + getTime().hashCode();
      hash = (37 * hash) + TYPE_FIELD_NUMBER;
      hash = (53 * hash) + getType().hashCode();
      hash = (37 * hash) + API_FIELD_NUMBER;
      hash = (53 * hash) + getApi().hashCode();
      hash = (37 * hash) + NUM_FIELD_NUMBER;
      hash = (53 * hash) + getNum().hashCode();
      hash = (29 * hash) + unknownFields.hashCode();
      memoizedHashCode = hash;
      return hash;
    }

    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
        java.nio.ByteBuffer data)
        throws com.google.protobuf.InvalidProtocolBufferException {
      return PARSER.parseFrom(data);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
        java.nio.ByteBuffer data,
        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
        throws com.google.protobuf.InvalidProtocolBufferException {
      return PARSER.parseFrom(data, extensionRegistry);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
        com.google.protobuf.ByteString data)
        throws com.google.protobuf.InvalidProtocolBufferException {
      return PARSER.parseFrom(data);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
        com.google.protobuf.ByteString data,
        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
        throws com.google.protobuf.InvalidProtocolBufferException {
      return PARSER.parseFrom(data, extensionRegistry);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(byte[] data)
        throws com.google.protobuf.InvalidProtocolBufferException {
      return PARSER.parseFrom(data);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
        byte[] data,
        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
        throws com.google.protobuf.InvalidProtocolBufferException {
      return PARSER.parseFrom(data, extensionRegistry);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(java.io.InputStream input)
        throws java.io.IOException {
      return com.google.protobuf.GeneratedMessageV3
          .parseWithIOException(PARSER, input);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
        java.io.InputStream input,
        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
        throws java.io.IOException {
      return com.google.protobuf.GeneratedMessageV3
          .parseWithIOException(PARSER, input, extensionRegistry);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseDelimitedFrom(java.io.InputStream input)
        throws java.io.IOException {
      return com.google.protobuf.GeneratedMessageV3
          .parseDelimitedWithIOException(PARSER, input);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseDelimitedFrom(
        java.io.InputStream input,
        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
        throws java.io.IOException {
      return com.google.protobuf.GeneratedMessageV3
          .parseDelimitedWithIOException(PARSER, input, extensionRegistry);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
        com.google.protobuf.CodedInputStream input)
        throws java.io.IOException {
      return com.google.protobuf.GeneratedMessageV3
          .parseWithIOException(PARSER, input);
    }
    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
        com.google.protobuf.CodedInputStream input,
        com.google.protobuf.ExtensionRegistryLite extensionRegistry)
        throws java.io.IOException {
      return com.google.protobuf.GeneratedMessageV3
          .parseWithIOException(PARSER, input, extensionRegistry);
    }

    @java.lang.Override
    public Builder newBuilderForType() { return newBuilder(); }
    public static Builder newBuilder() {
      return DEFAULT_INSTANCE.toBuilder();
    }
    public static Builder newBuilder(com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog prototype) {
      return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
    }
    @java.lang.Override
    public Builder toBuilder() {
      return this == DEFAULT_INSTANCE
          ? new Builder() : new Builder().mergeFrom(this);
    }

    @java.lang.Override
    protected Builder newBuilderForType(
        com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
      Builder builder = new Builder(parent);
      return builder;
    }
    /**
     * <pre>
     * 消息结构定义
     * </pre>
     *
     * Protobuf type {@code AccessLog}
     */
    public static final class Builder extends
        com.google.protobuf.GeneratedMessageV3.Builder<Builder> implements
        // @@protoc_insertion_point(builder_implements:AccessLog)
        com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLogOrBuilder {
      public static final com.google.protobuf.Descriptors.Descriptor
          getDescriptor() {
        return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_descriptor;
      }

      @java.lang.Override
      protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
          internalGetFieldAccessorTable() {
        return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_fieldAccessorTable
            .ensureFieldAccessorsInitialized(
                com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.class, com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.Builder.class);
      }

      // Construct using com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.newBuilder()
      private Builder() {
        maybeForceBuilderInitialization();
      }

      private Builder(
          com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
        super(parent);
        maybeForceBuilderInitialization();
      }
      private void maybeForceBuilderInitialization() {
        if (com.google.protobuf.GeneratedMessageV3
                .alwaysUseFieldBuilders) {
        }
      }
      @java.lang.Override
      public Builder clear() {
        super.clear();
        ip_ = "";

        time_ = "";

        type_ = "";

        api_ = "";

        num_ = "";

        return this;
      }

      @java.lang.Override
      public com.google.protobuf.Descriptors.Descriptor
          getDescriptorForType() {
        return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_descriptor;
      }

      @java.lang.Override
      public com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog getDefaultInstanceForType() {
        return com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.getDefaultInstance();
      }

      @java.lang.Override
      public com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog build() {
        com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog result = buildPartial();
        if (!result.isInitialized()) {
          throw newUninitializedMessageException(result);
        }
        return result;
      }

      @java.lang.Override
      public com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog buildPartial() {
        com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog result = new com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog(this);
        result.ip_ = ip_;
        result.time_ = time_;
        result.type_ = type_;
        result.api_ = api_;
        result.num_ = num_;
        onBuilt();
        return result;
      }

      @java.lang.Override
      public Builder clone() {
        return super.clone();
      }
      @java.lang.Override
      public Builder setField(
          com.google.protobuf.Descriptors.FieldDescriptor field,
          java.lang.Object value) {
        return super.setField(field, value);
      }
      @java.lang.Override
      public Builder clearField(
          com.google.protobuf.Descriptors.FieldDescriptor field) {
        return super.clearField(field);
      }
      @java.lang.Override
      public Builder clearOneof(
          com.google.protobuf.Descriptors.OneofDescriptor oneof) {
        return super.clearOneof(oneof);
      }
      @java.lang.Override
      public Builder setRepeatedField(
          com.google.protobuf.Descriptors.FieldDescriptor field,
          int index, java.lang.Object value) {
        return super.setRepeatedField(field, index, value);
      }
      @java.lang.Override
      public Builder addRepeatedField(
          com.google.protobuf.Descriptors.FieldDescriptor field,
          java.lang.Object value) {
        return super.addRepeatedField(field, value);
      }
      @java.lang.Override
      public Builder mergeFrom(com.google.protobuf.Message other) {
        if (other instanceof com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog) {
          return mergeFrom((com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog)other);
        } else {
          super.mergeFrom(other);
          return this;
        }
      }

      public Builder mergeFrom(com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog other) {
        if (other == com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.getDefaultInstance()) return this;
        if (!other.getIp().isEmpty()) {
          ip_ = other.ip_;
          onChanged();
        }
        if (!other.getTime().isEmpty()) {
          time_ = other.time_;
          onChanged();
        }
        if (!other.getType().isEmpty()) {
          type_ = other.type_;
          onChanged();
        }
        if (!other.getApi().isEmpty()) {
          api_ = other.api_;
          onChanged();
        }
        if (!other.getNum().isEmpty()) {
          num_ = other.num_;
          onChanged();
        }
        this.mergeUnknownFields(other.unknownFields);
        onChanged();
        return this;
      }

      @java.lang.Override
      public final boolean isInitialized() {
        return true;
      }

      @java.lang.Override
      public Builder mergeFrom(
          com.google.protobuf.CodedInputStream input,
          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
          throws java.io.IOException {
        com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parsedMessage = null;
        try {
          parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
        } catch (com.google.protobuf.InvalidProtocolBufferException e) {
          parsedMessage = (com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog) e.getUnfinishedMessage();
          throw e.unwrapIOException();
        } finally {
          if (parsedMessage != null) {
            mergeFrom(parsedMessage);
          }
        }
        return this;
      }

      private java.lang.Object ip_ = "";
      /**
       * <code>string ip = 1;</code>
       */
      public java.lang.String getIp() {
        java.lang.Object ref = ip_;
        if (!(ref instanceof java.lang.String)) {
          com.google.protobuf.ByteString bs =
              (com.google.protobuf.ByteString) ref;
          java.lang.String s = bs.toStringUtf8();
          ip_ = s;
          return s;
        } else {
          return (java.lang.String) ref;
        }
      }
      /**
       * <code>string ip = 1;</code>
       */
      public com.google.protobuf.ByteString
          getIpBytes() {
        java.lang.Object ref = ip_;
        if (ref instanceof String) {
          com.google.protobuf.ByteString b = 
              com.google.protobuf.ByteString.copyFromUtf8(
                  (java.lang.String) ref);
          ip_ = b;
          return b;
        } else {
          return (com.google.protobuf.ByteString) ref;
        }
      }
      /**
       * <code>string ip = 1;</code>
       */
      public Builder setIp(
          java.lang.String value) {
        if (value == null) {
    throw new NullPointerException();
  }
  
        ip_ = value;
        onChanged();
        return this;
      }
      /**
       * <code>string ip = 1;</code>
       */
      public Builder clearIp() {
        
        ip_ = getDefaultInstance().getIp();
        onChanged();
        return this;
      }
      /**
       * <code>string ip = 1;</code>
       */
      public Builder setIpBytes(
          com.google.protobuf.ByteString value) {
        if (value == null) {
    throw new NullPointerException();
  }
  checkByteStringIsUtf8(value);
        
        ip_ = value;
        onChanged();
        return this;
      }

      private java.lang.Object time_ = "";
      /**
       * <code>string time = 2;</code>
       */
      public java.lang.String getTime() {
        java.lang.Object ref = time_;
        if (!(ref instanceof java.lang.String)) {
          com.google.protobuf.ByteString bs =
              (com.google.protobuf.ByteString) ref;
          java.lang.String s = bs.toStringUtf8();
          time_ = s;
          return s;
        } else {
          return (java.lang.String) ref;
        }
      }
      /**
       * <code>string time = 2;</code>
       */
      public com.google.protobuf.ByteString
          getTimeBytes() {
        java.lang.Object ref = time_;
        if (ref instanceof String) {
          com.google.protobuf.ByteString b = 
              com.google.protobuf.ByteString.copyFromUtf8(
                  (java.lang.String) ref);
          time_ = b;
          return b;
        } else {
          return (com.google.protobuf.ByteString) ref;
        }
      }
      /**
       * <code>string time = 2;</code>
       */
      public Builder setTime(
          java.lang.String value) {
        if (value == null) {
    throw new NullPointerException();
  }
  
        time_ = value;
        onChanged();
        return this;
      }
      /**
       * <code>string time = 2;</code>
       */
      public Builder clearTime() {
        
        time_ = getDefaultInstance().getTime();
        onChanged();
        return this;
      }
      /**
       * <code>string time = 2;</code>
       */
      public Builder setTimeBytes(
          com.google.protobuf.ByteString value) {
        if (value == null) {
    throw new NullPointerException();
  }
  checkByteStringIsUtf8(value);
        
        time_ = value;
        onChanged();
        return this;
      }

      private java.lang.Object type_ = "";
      /**
       * <code>string type = 3;</code>
       */
      public java.lang.String getType() {
        java.lang.Object ref = type_;
        if (!(ref instanceof java.lang.String)) {
          com.google.protobuf.ByteString bs =
              (com.google.protobuf.ByteString) ref;
          java.lang.String s = bs.toStringUtf8();
          type_ = s;
          return s;
        } else {
          return (java.lang.String) ref;
        }
      }
      /**
       * <code>string type = 3;</code>
       */
      public com.google.protobuf.ByteString
          getTypeBytes() {
        java.lang.Object ref = type_;
        if (ref instanceof String) {
          com.google.protobuf.ByteString b = 
              com.google.protobuf.ByteString.copyFromUtf8(
                  (java.lang.String) ref);
          type_ = b;
          return b;
        } else {
          return (com.google.protobuf.ByteString) ref;
        }
      }
      /**
       * <code>string type = 3;</code>
       */
      public Builder setType(
          java.lang.String value) {
        if (value == null) {
    throw new NullPointerException();
  }
  
        type_ = value;
        onChanged();
        return this;
      }
      /**
       * <code>string type = 3;</code>
       */
      public Builder clearType() {
        
        type_ = getDefaultInstance().getType();
        onChanged();
        return this;
      }
      /**
       * <code>string type = 3;</code>
       */
      public Builder setTypeBytes(
          com.google.protobuf.ByteString value) {
        if (value == null) {
    throw new NullPointerException();
  }
  checkByteStringIsUtf8(value);
        
        type_ = value;
        onChanged();
        return this;
      }

      private java.lang.Object api_ = "";
      /**
       * <code>string api = 4;</code>
       */
      public java.lang.String getApi() {
        java.lang.Object ref = api_;
        if (!(ref instanceof java.lang.String)) {
          com.google.protobuf.ByteString bs =
              (com.google.protobuf.ByteString) ref;
          java.lang.String s = bs.toStringUtf8();
          api_ = s;
          return s;
        } else {
          return (java.lang.String) ref;
        }
      }
      /**
       * <code>string api = 4;</code>
       */
      public com.google.protobuf.ByteString
          getApiBytes() {
        java.lang.Object ref = api_;
        if (ref instanceof String) {
          com.google.protobuf.ByteString b = 
              com.google.protobuf.ByteString.copyFromUtf8(
                  (java.lang.String) ref);
          api_ = b;
          return b;
        } else {
          return (com.google.protobuf.ByteString) ref;
        }
      }
      /**
       * <code>string api = 4;</code>
       */
      public Builder setApi(
          java.lang.String value) {
        if (value == null) {
    throw new NullPointerException();
  }
  
        api_ = value;
        onChanged();
        return this;
      }
      /**
       * <code>string api = 4;</code>
       */
      public Builder clearApi() {
        
        api_ = getDefaultInstance().getApi();
        onChanged();
        return this;
      }
      /**
       * <code>string api = 4;</code>
       */
      public Builder setApiBytes(
          com.google.protobuf.ByteString value) {
        if (value == null) {
    throw new NullPointerException();
  }
  checkByteStringIsUtf8(value);
        
        api_ = value;
        onChanged();
        return this;
      }

      private java.lang.Object num_ = "";
      /**
       * <code>string num = 5;</code>
       */
      public java.lang.String getNum() {
        java.lang.Object ref = num_;
        if (!(ref instanceof java.lang.String)) {
          com.google.protobuf.ByteString bs =
              (com.google.protobuf.ByteString) ref;
          java.lang.String s = bs.toStringUtf8();
          num_ = s;
          return s;
        } else {
          return (java.lang.String) ref;
        }
      }
      /**
       * <code>string num = 5;</code>
       */
      public com.google.protobuf.ByteString
          getNumBytes() {
        java.lang.Object ref = num_;
        if (ref instanceof String) {
          com.google.protobuf.ByteString b = 
              com.google.protobuf.ByteString.copyFromUtf8(
                  (java.lang.String) ref);
          num_ = b;
          return b;
        } else {
          return (com.google.protobuf.ByteString) ref;
        }
      }
      /**
       * <code>string num = 5;</code>
       */
      public Builder setNum(
          java.lang.String value) {
        if (value == null) {
    throw new NullPointerException();
  }
  
        num_ = value;
        onChanged();
        return this;
      }
      /**
       * <code>string num = 5;</code>
       */
      public Builder clearNum() {
        
        num_ = getDefaultInstance().getNum();
        onChanged();
        return this;
      }
      /**
       * <code>string num = 5;</code>
       */
      public Builder setNumBytes(
          com.google.protobuf.ByteString value) {
        if (value == null) {
    throw new NullPointerException();
  }
  checkByteStringIsUtf8(value);
        
        num_ = value;
        onChanged();
        return this;
      }
      @java.lang.Override
      public final Builder setUnknownFields(
          final com.google.protobuf.UnknownFieldSet unknownFields) {
        return super.setUnknownFields(unknownFields);
      }

      @java.lang.Override
      public final Builder mergeUnknownFields(
          final com.google.protobuf.UnknownFieldSet unknownFields) {
        return super.mergeUnknownFields(unknownFields);
      }


      // @@protoc_insertion_point(builder_scope:AccessLog)
    }

    // @@protoc_insertion_point(class_scope:AccessLog)
    private static final com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog DEFAULT_INSTANCE;
    static {
      DEFAULT_INSTANCE = new com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog();
    }

    public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog getDefaultInstance() {
      return DEFAULT_INSTANCE;
    }

    private static final com.google.protobuf.Parser<AccessLog>
        PARSER = new com.google.protobuf.AbstractParser<AccessLog>() {
      @java.lang.Override
      public AccessLog parsePartialFrom(
          com.google.protobuf.CodedInputStream input,
          com.google.protobuf.ExtensionRegistryLite extensionRegistry)
          throws com.google.protobuf.InvalidProtocolBufferException {
        return new AccessLog(input, extensionRegistry);
      }
    };

    public static com.google.protobuf.Parser<AccessLog> parser() {
      return PARSER;
    }

    @java.lang.Override
    public com.google.protobuf.Parser<AccessLog> getParserForType() {
      return PARSER;
    }

    @java.lang.Override
    public com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog getDefaultInstanceForType() {
      return DEFAULT_INSTANCE;
    }

  }

  private static final com.google.protobuf.Descriptors.Descriptor
    internal_static_AccessLog_descriptor;
  private static final 
    com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
      internal_static_AccessLog_fieldAccessorTable;

  public static com.google.protobuf.Descriptors.FileDescriptor
      getDescriptor() {
    return descriptor;
  }
  private static  com.google.protobuf.Descriptors.FileDescriptor
      descriptor;
  static {
    java.lang.String[] descriptorData = {
      "\n\017AccessLog.proto\"M\n\tAccessLog\022\n\n\002ip\030\001 \001" +
      "(\t\022\014\n\004time\030\002 \001(\t\022\014\n\004type\030\003 \001(\t\022\013\n\003api\030\004 " +
      "\001(\t\022\013\n\003num\030\005 \001(\tB6\n$com.lcc.flink.connec" +
      "tors.kafka.protoB\016AccessLogProtob\006proto3"
    };
    descriptor = com.google.protobuf.Descriptors.FileDescriptor
      .internalBuildGeneratedFileFrom(descriptorData,
        new com.google.protobuf.Descriptors.FileDescriptor[] {
        });
    internal_static_AccessLog_descriptor =
      getDescriptor().getMessageTypes().get(0);
    internal_static_AccessLog_fieldAccessorTable = new
      com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
        internal_static_AccessLog_descriptor,
        new java.lang.String[] { "Ip", "Time", "Type", "Api", "Num", });
  }

  // @@protoc_insertion_point(outer_class_scope)
}

真正的AccessLog实体对象

java 复制代码
@Data
public class AccessLog implements Serializable {
    private String ip;

    private String time;

    private String type;

    private String api;

    private Integer num;
}

然后编写我们的自定义序列化类CustomSerialSchema

java 复制代码
public class CustomSerialSchema implements DeserializationSchema<AccessLog>, SerializationSchema<AccessLog> {

    private static final long serialVersionUID = 1L;

    private transient Charset charset;

    public CustomSerialSchema() {
        this(StandardCharsets.UTF_8);
    }

    public CustomSerialSchema(Charset charset) {
        this.charset = checkNotNull(charset);
    }

    public Charset getCharset() {
        return charset;
    }


    /**
     * 反序列化实现
     * @param message
     * @return
     */
    @Override
    public AccessLog deserialize(byte[] message) {
        AccessLog accessLog = null;
        try {
            AccessLogProto.AccessLog accessLogProto = AccessLogProto.AccessLog.parseFrom(message);
            accessLog = new AccessLog();
            BeanUtils.copyProperties(accessLogProto, accessLog);
            return accessLog;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return accessLog;
    }

    @Override
    public boolean isEndOfStream(AccessLog nextElement) {
        return false;
    }

    /**
     * 序列化处理
     * @param element
     * @return
     */
    @Override
    public byte[] serialize(AccessLog element) {
        AccessLogProto.AccessLog.Builder builder = AccessLogProto.AccessLog.newBuilder();
        BeanUtils.copyProperties(element, builder);
        return builder.build().toByteArray();
    }

    /**
     * 定义消息类型
     * @return
     */
    @Override
    public TypeInformation<AccessLog> getProducedType() {
        return TypeInformation.of(AccessLog.class);
    }
}

接下来,在从kafka读取或是写入数据,我们都可以指定自己序列化方式了

2、使用自定义序列化器写入到kafka

java 复制代码
public class KafkaSinkApplication {

    public static void main(String[] args) throws Exception{

        // 1. 创建运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 2. 读取Socket数据源
        DataStreamSource<String> socketStr = env.socketTextStream("localhost", 9911, "\n");
        // 3. 转换处理流数据
        SingleOutputStreamOperator<AccessLog> outputStream = socketStr.map(new MapFunction<String, AccessLog>() {
            @Override
            public AccessLog map(String value) throws Exception {
                System.out.println(value);
                // 根据分隔符解析数据
                String[] arrValue = value.split("\t");
                // 将数据组装为对象
                AccessLog log = new AccessLog();
                log.setNum(1);
                for(int i=0; i<arrValue.length; i++) {
                    if(i == 0) {
                        log.setIp(arrValue[i]);
                    }else if( i== 1) {
                        log.setTime(arrValue[i]);
                    }else if( i== 2) {
                        log.setType(arrValue[i]);
                    }else if( i== 3) {
                        log.setApi(arrValue[i]);
                    }
                }

                return log;
            }
        });

        // 3. Kakfa的生产者配置
        FlinkKafkaProducer kafkaProducer = new FlinkKafkaProducer(
                "10.10.20.132:9092", // kafka broker 连接列表信息
                "flink-serial",  // kafka 的连接topic
                new CustomSerialSchema() // 自定义序列化实现
        );

        // 4. 添加kafka的写入器
        outputStream.addSink(kafkaProducer);
        socketStr.print().setParallelism(1);

        // 5. 执行任务
        env.execute("job");
    }
}
java 复制代码
public class KafkaSourceApplication {

    public static void main(String[] args) throws Exception {
        // 1. 创建运行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 2. 设置kafka服务连接信息
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "192.168.149.128:9092");
        properties.setProperty("group.id", "flink_group");

        // 3. 创建Kafka消费端
        FlinkKafkaConsumer kafkaProducer = new FlinkKafkaConsumer(
                "flink-serial",                  // 目标 topic
                new CustomSerialSchema(),   // 自定义序列化
                properties);

        // 4. 读取Kafka数据源
        DataStreamSource<AccessLog> socketStr = env.addSource(kafkaProducer);

        socketStr.print().setParallelism(1);

        // 5. 执行任务
        env.execute("custom-job");
    }
}
相关推荐
幸好我会魔法2 小时前
人格分裂(交互问答)-小白想懂Elasticsearch
大数据·spring boot·后端·elasticsearch·搜索引擎·全文检索
顧棟3 小时前
【ES实战】治理项之索引模板相关治理
大数据·elasticsearch·yarn
karatttt4 小时前
MapReduce,Yarn,Spark理解与执行流程
大数据·spark·mapreduce
STONE_KKK5 小时前
半小时速通flume-flume概述
大数据·flume
shinelord明7 小时前
【大数据】数据治理浅析
大数据·人工智能·软件工程
说私域8 小时前
私域流量池构建与转化策略:以开源链动2+1模式AI智能名片S2B2C商城小程序为例
大数据·人工智能·小程序·开源
LPiling10 小时前
激光晶体材料行业深度分析
大数据·人工智能
牛马程序员‍13 小时前
云岚到家项目100问 v1.0
大数据·apache
撸码到无法自拔14 小时前
MATLAB中处理大数据的技巧与方法
大数据·开发语言·matlab
三月七(爱看动漫的程序员)14 小时前
Genetic Prompt Search via Exploiting Language Model Probabilities
大数据·人工智能·算法·语言模型·自然语言处理·prompt