Flink连接器
Flink 连接器包含数据源输入与汇聚输出两部分。
**读取:**Flink自身内置了一些基础的连接器,数据源输入包含文件、目录、Socket以及 支持从collections 和 iterators 中读取数据;
**写出:**汇聚输出支持把数据写入文件、标准输出(stdout)、标准错误输出(stderr)和 socket。
以下是官方提供的第三方接入连接器:(source代表读取,sink代表写出)
Apache Kafka (source/sink)
Apache Cassandra (sink)
Amazon Kinesis Streams (source/sink)
Elasticsearch (sink)
Hadoop FileSystem (sink)
RabbitMQ (source/sink)
Apache NiFi (source/sink)
Twitter Streaming API (source)
Google PubSub (source/sink)
JDBC (sink)
JDBC读写
创建数据表
sql
CREATE TABLE `t_access_log` (
`id` BIGINT ( 20 ) NOT NULL AUTO_INCREMENT COMMENT '主键ID',
`ip` VARCHAR ( 32 ) NOT NULL COMMENT 'IP地址',
`time` VARCHAR ( 255 ) NULL DEFAULT NULL COMMENT '访问时间',
`type` VARCHAR ( 32 ) NOT NULL COMMENT '请求类型',
`api` VARCHAR ( 32 ) NOT NULL COMMENT 'API地址',
PRIMARY KEY ( `id` )
) ENGINE = InnoDB AUTO_INCREMENT = 1;
AccessLog实体类
java
@Data
public class AccessLog {
private String ip;
private String time;
private String type;
private String api;
}
1、从ArrayList读取数据,插入到MySQL
java
public class JdbcConnectorsApplication {
public static void main(String[] args) throws Exception {
//1.创建运行环境
StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
//2.准备集合数据
List<String> arrayList = new ArrayList<>();
arrayList.add("10.10.20.101\t1601297294548\tPOST\taddOrder");
arrayList.add("10.10.20.102\t1601297296549\tGET\tgetOrder");
//3.读取集合数据,写入数据库
streamExecutionEnvironment.fromCollection(arrayList).addSink(JdbcSink.sink(
"insert into t_access_log(ip,time,type,api) values(?,?,?,?)",
((preparedStatement, data) -> {
System.out.println("data:"+data);
// 解析数据
String[] arrValue = String.valueOf(data).split("\t");
for (int i = 0; i < arrValue.length; i++) {
// 新增数据
preparedStatement.setString(i+1,arrValue[i]);
}
}),
new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.withUrl("jdbc:mysql://127.0.0.1:3306/flink_demo?useSSL=false")
.withDriverName("com.mysql.jdbc.Driver")
.withUsername("root")
.withPassword("123456")
.build()
));
//执行任务
streamExecutionEnvironment.execute("jdbc-job");
}
}
2、 自定义写入MySQL
java
public class CustomSinkApplication {
public static void main(String[] args) throws Exception {
// 1. 创建 Flink 的流处理环境
StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 从 Socket 数据源读取数据
// 参数分别为:主机名、端口号、行分隔符
DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.socketTextStream("localhost", 9999, "\n");
// 3. 转换处理流数据
// 使用 map 函数将读取到的字符串数据转换为 AccessLog 对象
SingleOutputStreamOperator<AccessLog> outputStream = dataStreamSource.map(new MapFunction<String, AccessLog>() {
@Override
public AccessLog map(String line) {
// 将字符串按逗号分隔成数组
String[] arrValue = line.split(",");
// 创建一个新的 AccessLog 对象
AccessLog accessLog = new AccessLog();
// 根据数组的索引将数据赋值给 AccessLog 对象的相应属性
for (int i = 0; i < arrValue.length; i++) {
switch (i) {
case 0:
accessLog.setIp(arrValue[i]);
break;
case 1:
accessLog.setTime(arrValue[i]);
break;
case 2:
accessLog.setType(arrValue[i]);
break;
case 3:
accessLog.setApi(arrValue[i]);
break;
}
}
// 返回转换后的 AccessLog 对象
return accessLog;
}
});
// 4. 配置自定义写入数据源
// 将处理后的 AccessLog 对象写入到自定义的 MySQLSinkFunction 中
outputStream.addSink(new MySQLSinkFunction());
// 5. 执行任务
// 启动 Flink 作业,开始执行流处理任务
streamExecutionEnvironment.execute("CustomSinkApplication");
}
}
其中,在将outputStream写入到mysql时,需要自定义MySQL写入的function
outputStream.addSink(new MySQLSinkFunction());
java
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
public class MySQLSinkFunction extends RichSinkFunction<AccessLog> {
private Connection connection;
private PreparedStatement preparedStatement;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
// 初始化数据库连接
String url = "jdbc:mysql://127.0.0.1:3306/flink_demo?useSSL=false";
String user = "root";
String password = "123456";
// 使用 JDBC 驱动建立数据库连接
connection = DriverManager.getConnection(url, user, password);
// 准备插入语句
String sql = "INSERT INTO t_access_log (ip, time, type, api) VALUES (?, ?, ?, ?)";
// 创建预编译的插入语句
preparedStatement = connection.prepareStatement(sql);
}
@Override
public void invoke(AccessLog data, Context context) throws Exception {
// 设置插入语句的参数
preparedStatement.setString(1, data.getIp());
preparedStatement.setString(2, data.getTime());
preparedStatement.setString(3, data.getType());
preparedStatement.setString(4, data.getApi());
// 执行插入操作
preparedStatement.executeUpdate();
}
@Override
public void close() throws Exception {
super.close();
// 关闭预编译的插入语句
if (preparedStatement != null) {
preparedStatement.close();
}
// 关闭数据库连接
if (connection != null) {
connection.close();
}
}
}
3、自定义从MySQL读入数据(官方只提供写入到MySQL的连接器)
java
/**
* 自定义写入
*/
public class CustomSourceApplication {
public static void main(String[] args) throws Exception{
// 1. 创建运行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 配置自定义MySQL读取数据源
DataStreamSource<AccessLog> dataStream = env.addSource(new MySQLSourceFunction());
// 3. 设置并行度
dataStream.print().setParallelism(1);
// 4. 执行任务
env.execute("custom jdbc source.");
}
}
读入function
java
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.source.RichSourceFunction;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
public class MySQLSourceFunction extends RichSourceFunction<AccessLog> {
private Connection connection;
private PreparedStatement preparedStatement;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
// 初始化数据库连接
String url = "jdbc:mysql://127.0.0.1:3306/flink_demo?useSSL=false";
String user = "root";
String password = "123456";
// 使用 JDBC 驱动建立数据库连接
connection = DriverManager.getConnection(url, user, password);
// 准备查询语句
String sql = "SELECT ip, time, type, api FROM t_access_log";
// 创建预编译的查询语句
preparedStatement = connection.prepareStatement(sql);
}
@Override
public void run(SourceContext<AccessLog> ctx) throws Exception {
// 执行查询语句并获取结果集
ResultSet resultSet = preparedStatement.executeQuery();
// 遍历结果集,将每条记录转换为 AccessLog 对象并发送到下游
while (resultSet.next()) {
AccessLog accessLog = new AccessLog();
accessLog.setIp(resultSet.getString("ip"));
accessLog.setApi(resultSet.getString("api"));
accessLog.setType(resultSet.getString("type"));
accessLog.setTime(resultSet.getString("time"));
// 使用 SourceContext 的 collect 方法将数据发送到下游
ctx.collect(accessLog);
}
}
@Override
public void close() throws Exception {
super.close();
// 关闭预编译的查询语句
if (preparedStatement != null) {
preparedStatement.close();
}
// 关闭数据库连接
if (connection != null) {
connection.close();
}
}
@Override
public void cancel() {
// 当 Flink 作业被取消时,调用此方法
// 可以在这里添加额外的清理逻辑
}
}
基于Netty的数据模拟
创建一个新的Maven模块
XML
<dependencies>
<!-- Netty 核心组件依赖 -->
<dependency>
<groupId>io.netty</groupId>
<artifactId>netty-all</artifactId>
<version>4.1.16.Final</version>
</dependency>
<!-- spring boot 依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<version>${spring.boot.version}</version>
</dependency>
<!-- Spring data jpa 组件依赖-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
<version>${spring.boot.version}</version>
</dependency>
<!-- mysql-connector-java -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql.jdbc.version}</version>
</dependency>
<!-- Redis 缓存依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
<version>2.1.1.RELEASE</version>
</dependency>
</dependencies>
SocketSourceApplication
java
package com.lcc.flink.sources.socket;
import com.lcc.flink.sources.socket.netty.NettyServerHandler;
import io.netty.bootstrap.ServerBootstrap;
import io.netty.channel.*;
import io.netty.channel.nio.NioEventLoopGroup;
import io.netty.channel.socket.SocketChannel;
import io.netty.channel.socket.nio.NioServerSocketChannel;
import io.netty.handler.codec.string.StringDecoder;
import io.netty.handler.codec.string.StringEncoder;
import io.netty.handler.logging.LogLevel;
import io.netty.handler.logging.LoggingHandler;
import java.util.Random;
public class SocketSourceApplication {
/**
* 服务端的端口
*/
private int port;
/**
* 初始化构造方法
* @param port
*/
public SocketSourceApplication(int port) {
this.port = port;
}
/**
* IP 访问列表
*/
private static String[] accessIps = new String[]{"10.10.20.101", "10.10.20.102", "10.10.20.103"};
/**
* 请求访问类型
*/
private static String[] accessTypes = new String[] {"GET", "POST", "PUT"};
/**
* 请求接口信息
*/
private static String[] accessApis = new String[] {"addOrder", "getAccount", "getOrder"};
/**
* Netty通讯服务启动方法
* @throws Exception
*/
public void runServer() throws Exception {
// 1. 创建Netty服务
// 2. 定义事件Boss监听组
EventLoopGroup bossGroup = new NioEventLoopGroup();
// 3. 定义用来处理已经被接收的连接
EventLoopGroup workerGourp = new NioEventLoopGroup();
try {
// 4. 定义NIO的服务启动类
ServerBootstrap sbs = new ServerBootstrap();
// 5. 配置NIO服务启动的相关参数
sbs.group(bossGroup, workerGourp)
.channel(NioServerSocketChannel.class)
// tcp最大缓存链接个数,它是tcp的参数, tcp_max_syn_backlog(半连接上限数量, CENTOS6.5默认是128)
.option(ChannelOption.SO_BACKLOG, 128)
//保持连接的正常状态
.childOption(ChannelOption.SO_KEEPALIVE, true)
// 根据日志级别打印输出
.handler(new LoggingHandler(LogLevel.INFO))
.childHandler(new ChannelInitializer<SocketChannel>() {
@Override
protected void initChannel(SocketChannel socketChannel) throws Exception {
//管道注册handler
ChannelPipeline pipeline = socketChannel.pipeline();
//编码通道处理
pipeline.addLast("decode", new StringDecoder());
//转码通道处理
pipeline.addLast("encode", new StringEncoder());
// 处理接收到的请求
pipeline.addLast(new NettyServerHandler());
}
});
System.err.println("-------server 启动------");
// 6. 开启新线程, 模拟数据,广播发送
new Thread(new Runnable() {
@Override
public void run() {
try {
while(true) {
String accessLog = getAccessLog();
System.out.println("broadcast (" + NettyServerHandler.channelList.size() + ") ==> " + accessLog);
if(NettyServerHandler.channelList.size() > 0 ){
for(Channel channel : NettyServerHandler.channelList) {
channel.writeAndFlush(accessLog);
}
}
Thread.sleep(1000);
}
}catch(Exception e) {
e.printStackTrace();
}
}
}).start();
// 7. 启动netty服务
ChannelFuture cf = sbs.bind(port).sync();
cf.channel().closeFuture().sync();
}catch (Exception e) {
e.printStackTrace();
}
}
/**
* 获取访问日志
* @return
*/
private String getAccessLog() {
StringBuilder strBuilder = new StringBuilder();
strBuilder.append(accessIps[new Random().nextInt(accessIps.length )]).append("\t")
.append(System.currentTimeMillis()).append("\t")
.append(accessTypes[new Random().nextInt(accessTypes.length)]).append("\t")
.append(accessApis[new Random().nextInt(accessApis.length)]).append("\t\n");
return strBuilder.toString();
}
/**
* netty服务端的启动
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception{
new SocketSourceApplication(9911).runServer();
}
}
NettyHandler
java
package com.lcc.flink.sources.socket.netty;
import io.netty.channel.Channel;
import io.netty.channel.ChannelHandler;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInboundHandlerAdapter;
import io.netty.util.concurrent.EventExecutorGroup;
import java.util.ArrayList;
import java.util.List;
public class NettyServerHandler extends ChannelInboundHandlerAdapter {
// 客户端通道记录集合
public static List<Channel> channelList = new ArrayList<>();
@Override
public void channelActive(ChannelHandlerContext ctx) throws Exception {
System.out.println("Server---连接已建立: " + ctx);
super.channelActive(ctx);
// 将成功建立的连接通道, 加入到集合当中
channelList.add(ctx.channel());
}
@Override
public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception {
System.out.println("Server---收到的消息: " + msg);
}
@Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
System.out.println("server--读取数据出现异常");
cause.printStackTrace();
ctx.close();
}
@Override
public void channelUnregistered(ChannelHandlerContext ctx) throws Exception {
super.channelUnregistered(ctx);
// 移除无效的连接通道
channelList.remove(ctx.channel());
}
@Override
public void channelInactive(ChannelHandlerContext ctx) throws Exception {
super.channelInactive(ctx);
// 移除无效的连接通道
channelList.remove(ctx.channel());
}
}
HDFS读写
Hadoop安装教程
详细教程-Linux上安装单机版的Hadoop-CSDN博客
写出数据到指定目录
引入坐标
XML
<dependencies>
<!--连接器文件系统的操作依赖-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-filesystem_2.11</artifactId>
<version>1.11.2</version>
</dependency>
<!--Hadoop 客户端的操作依赖-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.8.1</version>
</dependency>
</dependencies>
java
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.fs.bucketing.BucketingSink;
import org.apache.flink.streaming.connectors.fs.bucketing.DateTimeBucketer;
import org.apache.flink.streaming.connectors.fs.bucketing.StringWriter;
public class HdfsSinkApplication {
public static void main(String[] args) throws Exception {
// 1. 创建 Flink 的流处理环境
StreamExecutionEnvironment executionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 从 Socket 数据源读取数据
// 参数分别为:主机名、端口号、行分隔符
DataStreamSource<String> socketTextStream = executionEnvironment.socketTextStream("localhost", 9911, "\n");
// 3. 配置 HDFS Sink
// 指定输出目录
BucketingSink<String> sink = new BucketingSink<>("C:\\Users\\hp-pc\\Desktop\\hdfs");
// 3.1 设置桶策略
// 使用日期时间作为桶的命名策略,格式为 "yyyy-MM-dd-HHmm"
sink.setBucketer(new DateTimeBucketer<>("yyyy-MM-dd-HHmm"));
// 3.2 设置写入器
// 使用默认的字符串写入器
sink.setWriter(new StringWriter())
// 3.3 设置每个批次的大小(字节)
.setBatchSize(5 * 1024)
// 3.4 设置每个批次的最大滚动时间(毫秒)
.setBatchRolloverInterval(5 * 1000)
// 3.5 设置检查不活跃桶的时间间隔(毫秒)
.setInactiveBucketCheckInterval(30 * 1000)
// 3.6 设置不活跃桶的最大阈值时间(毫秒)
.setInactiveBucketThreshold(60 * 1000);
// 4. 将数据流添加到 HDFS Sink,并设置并行度为 1
socketTextStream.addSink(sink).setParallelism(1);
// 5. 执行任务
// 启动 Flink 作业,开始执行流处理任务
executionEnvironment.execute("job");
}
}
从Hdfs读取数据
java
public class HdfsSourceApplication {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.readTextFile("hdfs://192.168.149.128:9090/hadoop-env.sh");
dataStreamSource.print().setParallelism(1);
// 4. 执行任务
streamExecutionEnvironment.execute("job");
}
}
输出结果
ES读写
ES的单机版安装教程
Linux上安装单机版ElasticSearch6.8.1-CSDN博客
flink将数据写入到ES的代码实现
引入依赖
XML
<dependencies>
<!-- ElasticSearch 连接器依赖-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-elasticsearch6_2.11</artifactId>
<version>1.7.2</version>
</dependency>
</dependencies>
java
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.http.HttpHost;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.xcontent.XContentType;
import org.apache.flink.streaming.connectors.elasticsearch.ElasticsearchSink;
import org.apache.flink.streaming.connectors.elasticsearch7.ElasticsearchSink.Builder;
import org.apache.flink.streaming.connectors.elasticsearch.util.RetryRejectedExecutionFailureHandler;
import org.apache.flink.api.common.functions.RuntimeContext;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class Main {
public static void main(String[] args) throws Exception {
// 1. 创建 Flink 的流处理环境
StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 从 Socket 数据源读取数据
// 参数分别为:主机名、端口号、行分隔符
DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.socketTextStream("localhost", 9911, "\n");
// 3. 配置 ES 服务信息
List<HttpHost> httpHosts = new ArrayList<>();
httpHosts.add(new HttpHost("192.168.149.128", 9200, "http"));
// 4. 数据解析处理
ElasticsearchSink.Builder<String> esSinkBuilder = new ElasticsearchSink.Builder<>(
httpHosts,
new ElasticsearchSinkFunction<String>() {
@Override
public void process(String line, RuntimeContext runtimeContext, RequestIndexer requestIndexer) {
// 创建一个 JSON 对象来存储解析后的数据
Map<String, String> json = new HashMap<>();
// 分割输入的行数据
String[] split = line.split("\t");
// 将分割后的数据放入 JSON 对象中
for (int i = 0; i < split.length; i++) {
if (i == 0) {
json.put("ip", split[i]);
} else if (i == 1) {
json.put("time", split[i]);
} else if (i == 2) {
json.put("type", split[i]);
} else if (i == 3) {
json.put("api", split[i]);
}
}
// 创建一个索引请求
IndexRequest indexRequest = Requests.indexRequest()
.index("flink-es") // 指定索引名称
.type("access-log") // 指定类型(在 ES 7.x 中,类型已被弃用,但这里保留)
.source(json, XContentType.JSON); // 设置数据源和内容类型
// 将索引请求添加到请求索引器中
requestIndexer.add(indexRequest);
}
}
);
// 5. ES 的写入配置
esSinkBuilder.setBulkFlushMaxActions(1); // 设置批量刷新的最大动作数
esSinkBuilder.setRestClientFactory(
restClientBuilder -> {
restClientBuilder.setMaxRetryTimeoutMillis(5000); // 设置最大重试超时时间
}
);
// 6. 添加 ES 的写入器
dataStreamSource.addSink(esSinkBuilder.build());
// 7. 打印数据流(调试用)
dataStreamSource.print().setParallelism(1);
// 8. 执行任务
streamExecutionEnvironment.execute("ES-job"); // 启动 Flink 作业,开始执行流处理任务
}
}
Kafka读写
Kafka的单机版安装教程
从Kafka读取数据到Flink
引入坐标
XML
<dependencies>
<!-- flink kafka连接器依赖 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_2.11</artifactId>
<version>1.11.2</version>
</dependency>
</dependencies>
java
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import java.util.Properties;
public class KafkaSourceApplication {
public static void main(String[] args) throws Exception {
// 1. 创建 Flink 的流处理环境
StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 配置 Kafka 连接属性
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "192.168.149.128:9092"); // Kafka 服务器地址
properties.setProperty("group.id", "flink_group"); // 消费者组 ID
// 3. 创建 Flink Kafka Consumer
FlinkKafkaConsumer<String> kafkaConsumer = new FlinkKafkaConsumer<>(
"flink-source", // Kafka 主题名称
new SimpleStringSchema(), // 序列化方案,这里使用简单的字符串序列化
properties // Kafka 连接属性
);
// 4. 设置 Kafka 消费的起始位置
kafkaConsumer.setStartFromEarliest(); // 尽可能从最早的记录开始
// kafkaConsumer.setStartFromLatest(); // 从最新的记录开始
// kafkaConsumer.setStartFromTimestamp(...); // 从指定的时间开始(毫秒)
// kafkaConsumer.setStartFromGroupOffsets(); // 默认的方法,从指定偏移量进行消费
// 5. 添加 Kafka 源到 Flink 流处理环境中
DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.addSource(kafkaConsumer);
// 6. 打印数据流(调试用)
dataStreamSource.print().setParallelism(1); // 设置并行度为 1,确保数据按顺序打印
// 7. 执行任务
streamExecutionEnvironment.execute("kafka-job"); // 启动 Flink 作业,开始执行流处理任务
}
}
从Flink写入数据到Kafka
java
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.connectors.kafka.internals.KeyedSerializationSchemaWrapper;
import java.util.Properties;
public class KafkaSinkApplication {
public static void main(String[] args) throws Exception {
// 1. 创建 Flink 的流处理环境
StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 配置 Kafka 连接属性
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "192.168.149.128:9092"); // Kafka 服务器地址
// 3. 从 Socket 数据源读取数据
DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.socketTextStream("localhost", 9911, "\n");
// 4. 创建 Flink Kafka Producer
FlinkKafkaProducer<String> kafkaProducer = new FlinkKafkaProducer<>(
"flink-sink-topic", // Kafka 主题名称
new KeyedSerializationSchemaWrapper<>(new SimpleStringSchema()), // 序列化方案
properties, // Kafka 连接属性
FlinkKafkaProducer.Semantic.AT_LEAST_ONCE // 至少消费一次,可能造成消息重复消费
);
// 5. 将数据流添加到 Kafka 生产者
dataStreamSource.addSink(kafkaProducer);
// 6. 打印数据流(调试用)
dataStreamSource.print().setParallelism(1); // 设置并行度为 1,确保数据按顺序打印
// 7. 执行任务
streamExecutionEnvironment.execute("kafka-producer-job"); // 启动 Flink 作业,开始执行流处理任务
}
}
自定义序列化方式,实现flink对kafka的读写
在实际应用场景中, 会存在各种复杂传输对象,同时要求较高的传输处理性能, 这就需要采用自定义的序列化方式做相应实现, 这里以Protobuf为例做讲解。
功能: kafka 对同一 Topic 的生产与消费,采用 Protobuf 做序列化与反序列化传输,验证能否正常解析数据。
protobuf的用法
1. 通过protobuf脚本生成JAVA文件
在resource目录下创建proto文件
AccessLog.proto
java
syntax = "proto3";
option java_package = "com.lcc.flink.connectors.kafka.proto";
option java_outer_classname = "AccessLogProto";
// 消息结构定义
message AccessLog {
string ip = 1;
string time = 2;
string type = 3;
string api = 4;
string num = 5;
}
指定protoc脚本生成Java类
protoc.exe
链接:https://pan.baidu.com/s/1bE2ZpwSA1A0TZOjJsR24lA
提取码:96sr
创建protoc_flink.bat文件,内容如下
bash
@echo off
for %%i in (proto/*.proto) do (
D:/dev_directory/proto/protoc.exe --proto_path=./proto --java_out=../java ./proto/%%i
echo generate %%i to java file successfully!
)
执行这个bat脚本,我们就在指定包下生成了proto的java类
java
public final class AccessLogProto {
private AccessLogProto() {}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistryLite registry) {
}
public static void registerAllExtensions(
com.google.protobuf.ExtensionRegistry registry) {
registerAllExtensions(
(com.google.protobuf.ExtensionRegistryLite) registry);
}
public interface AccessLogOrBuilder extends
// @@protoc_insertion_point(interface_extends:AccessLog)
com.google.protobuf.MessageOrBuilder {
/**
* <code>string ip = 1;</code>
*/
java.lang.String getIp();
/**
* <code>string ip = 1;</code>
*/
com.google.protobuf.ByteString
getIpBytes();
/**
* <code>string time = 2;</code>
*/
java.lang.String getTime();
/**
* <code>string time = 2;</code>
*/
com.google.protobuf.ByteString
getTimeBytes();
/**
* <code>string type = 3;</code>
*/
java.lang.String getType();
/**
* <code>string type = 3;</code>
*/
com.google.protobuf.ByteString
getTypeBytes();
/**
* <code>string api = 4;</code>
*/
java.lang.String getApi();
/**
* <code>string api = 4;</code>
*/
com.google.protobuf.ByteString
getApiBytes();
/**
* <code>string num = 5;</code>
*/
java.lang.String getNum();
/**
* <code>string num = 5;</code>
*/
com.google.protobuf.ByteString
getNumBytes();
}
/**
* <pre>
* 消息结构定义
* </pre>
*
* Protobuf type {@code AccessLog}
*/
public static final class AccessLog extends
com.google.protobuf.GeneratedMessageV3 implements
// @@protoc_insertion_point(message_implements:AccessLog)
AccessLogOrBuilder {
private static final long serialVersionUID = 0L;
// Use AccessLog.newBuilder() to construct.
private AccessLog(com.google.protobuf.GeneratedMessageV3.Builder<?> builder) {
super(builder);
}
private AccessLog() {
ip_ = "";
time_ = "";
type_ = "";
api_ = "";
num_ = "";
}
@java.lang.Override
@SuppressWarnings({"unused"})
protected java.lang.Object newInstance(
UnusedPrivateParameter unused) {
return new AccessLog();
}
@java.lang.Override
public final com.google.protobuf.UnknownFieldSet
getUnknownFields() {
return this.unknownFields;
}
private AccessLog(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
this();
if (extensionRegistry == null) {
throw new java.lang.NullPointerException();
}
com.google.protobuf.UnknownFieldSet.Builder unknownFields =
com.google.protobuf.UnknownFieldSet.newBuilder();
try {
boolean done = false;
while (!done) {
int tag = input.readTag();
switch (tag) {
case 0:
done = true;
break;
case 10: {
java.lang.String s = input.readStringRequireUtf8();
ip_ = s;
break;
}
case 18: {
java.lang.String s = input.readStringRequireUtf8();
time_ = s;
break;
}
case 26: {
java.lang.String s = input.readStringRequireUtf8();
type_ = s;
break;
}
case 34: {
java.lang.String s = input.readStringRequireUtf8();
api_ = s;
break;
}
case 42: {
java.lang.String s = input.readStringRequireUtf8();
num_ = s;
break;
}
default: {
if (!parseUnknownField(
input, unknownFields, extensionRegistry, tag)) {
done = true;
}
break;
}
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
throw e.setUnfinishedMessage(this);
} catch (java.io.IOException e) {
throw new com.google.protobuf.InvalidProtocolBufferException(
e).setUnfinishedMessage(this);
} finally {
this.unknownFields = unknownFields.build();
makeExtensionsImmutable();
}
}
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.class, com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.Builder.class);
}
public static final int IP_FIELD_NUMBER = 1;
private volatile java.lang.Object ip_;
/**
* <code>string ip = 1;</code>
*/
public java.lang.String getIp() {
java.lang.Object ref = ip_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
ip_ = s;
return s;
}
}
/**
* <code>string ip = 1;</code>
*/
public com.google.protobuf.ByteString
getIpBytes() {
java.lang.Object ref = ip_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
ip_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int TIME_FIELD_NUMBER = 2;
private volatile java.lang.Object time_;
/**
* <code>string time = 2;</code>
*/
public java.lang.String getTime() {
java.lang.Object ref = time_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
time_ = s;
return s;
}
}
/**
* <code>string time = 2;</code>
*/
public com.google.protobuf.ByteString
getTimeBytes() {
java.lang.Object ref = time_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
time_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int TYPE_FIELD_NUMBER = 3;
private volatile java.lang.Object type_;
/**
* <code>string type = 3;</code>
*/
public java.lang.String getType() {
java.lang.Object ref = type_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
type_ = s;
return s;
}
}
/**
* <code>string type = 3;</code>
*/
public com.google.protobuf.ByteString
getTypeBytes() {
java.lang.Object ref = type_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
type_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int API_FIELD_NUMBER = 4;
private volatile java.lang.Object api_;
/**
* <code>string api = 4;</code>
*/
public java.lang.String getApi() {
java.lang.Object ref = api_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
api_ = s;
return s;
}
}
/**
* <code>string api = 4;</code>
*/
public com.google.protobuf.ByteString
getApiBytes() {
java.lang.Object ref = api_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
api_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
public static final int NUM_FIELD_NUMBER = 5;
private volatile java.lang.Object num_;
/**
* <code>string num = 5;</code>
*/
public java.lang.String getNum() {
java.lang.Object ref = num_;
if (ref instanceof java.lang.String) {
return (java.lang.String) ref;
} else {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
num_ = s;
return s;
}
}
/**
* <code>string num = 5;</code>
*/
public com.google.protobuf.ByteString
getNumBytes() {
java.lang.Object ref = num_;
if (ref instanceof java.lang.String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
num_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
private byte memoizedIsInitialized = -1;
@java.lang.Override
public final boolean isInitialized() {
byte isInitialized = memoizedIsInitialized;
if (isInitialized == 1) return true;
if (isInitialized == 0) return false;
memoizedIsInitialized = 1;
return true;
}
@java.lang.Override
public void writeTo(com.google.protobuf.CodedOutputStream output)
throws java.io.IOException {
if (!getIpBytes().isEmpty()) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 1, ip_);
}
if (!getTimeBytes().isEmpty()) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 2, time_);
}
if (!getTypeBytes().isEmpty()) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 3, type_);
}
if (!getApiBytes().isEmpty()) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 4, api_);
}
if (!getNumBytes().isEmpty()) {
com.google.protobuf.GeneratedMessageV3.writeString(output, 5, num_);
}
unknownFields.writeTo(output);
}
@java.lang.Override
public int getSerializedSize() {
int size = memoizedSize;
if (size != -1) return size;
size = 0;
if (!getIpBytes().isEmpty()) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(1, ip_);
}
if (!getTimeBytes().isEmpty()) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(2, time_);
}
if (!getTypeBytes().isEmpty()) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(3, type_);
}
if (!getApiBytes().isEmpty()) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(4, api_);
}
if (!getNumBytes().isEmpty()) {
size += com.google.protobuf.GeneratedMessageV3.computeStringSize(5, num_);
}
size += unknownFields.getSerializedSize();
memoizedSize = size;
return size;
}
@java.lang.Override
public boolean equals(final java.lang.Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog)) {
return super.equals(obj);
}
com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog other = (com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog) obj;
if (!getIp()
.equals(other.getIp())) return false;
if (!getTime()
.equals(other.getTime())) return false;
if (!getType()
.equals(other.getType())) return false;
if (!getApi()
.equals(other.getApi())) return false;
if (!getNum()
.equals(other.getNum())) return false;
if (!unknownFields.equals(other.unknownFields)) return false;
return true;
}
@java.lang.Override
public int hashCode() {
if (memoizedHashCode != 0) {
return memoizedHashCode;
}
int hash = 41;
hash = (19 * hash) + getDescriptor().hashCode();
hash = (37 * hash) + IP_FIELD_NUMBER;
hash = (53 * hash) + getIp().hashCode();
hash = (37 * hash) + TIME_FIELD_NUMBER;
hash = (53 * hash) + getTime().hashCode();
hash = (37 * hash) + TYPE_FIELD_NUMBER;
hash = (53 * hash) + getType().hashCode();
hash = (37 * hash) + API_FIELD_NUMBER;
hash = (53 * hash) + getApi().hashCode();
hash = (37 * hash) + NUM_FIELD_NUMBER;
hash = (53 * hash) + getNum().hashCode();
hash = (29 * hash) + unknownFields.hashCode();
memoizedHashCode = hash;
return hash;
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
java.nio.ByteBuffer data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
java.nio.ByteBuffer data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
com.google.protobuf.ByteString data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
com.google.protobuf.ByteString data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(byte[] data)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
byte[] data,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return PARSER.parseFrom(data, extensionRegistry);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseDelimitedFrom(java.io.InputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseDelimitedFrom(
java.io.InputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseDelimitedWithIOException(PARSER, input, extensionRegistry);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
com.google.protobuf.CodedInputStream input)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input);
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parseFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
return com.google.protobuf.GeneratedMessageV3
.parseWithIOException(PARSER, input, extensionRegistry);
}
@java.lang.Override
public Builder newBuilderForType() { return newBuilder(); }
public static Builder newBuilder() {
return DEFAULT_INSTANCE.toBuilder();
}
public static Builder newBuilder(com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog prototype) {
return DEFAULT_INSTANCE.toBuilder().mergeFrom(prototype);
}
@java.lang.Override
public Builder toBuilder() {
return this == DEFAULT_INSTANCE
? new Builder() : new Builder().mergeFrom(this);
}
@java.lang.Override
protected Builder newBuilderForType(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
Builder builder = new Builder(parent);
return builder;
}
/**
* <pre>
* 消息结构定义
* </pre>
*
* Protobuf type {@code AccessLog}
*/
public static final class Builder extends
com.google.protobuf.GeneratedMessageV3.Builder<Builder> implements
// @@protoc_insertion_point(builder_implements:AccessLog)
com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLogOrBuilder {
public static final com.google.protobuf.Descriptors.Descriptor
getDescriptor() {
return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_descriptor;
}
@java.lang.Override
protected com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internalGetFieldAccessorTable() {
return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_fieldAccessorTable
.ensureFieldAccessorsInitialized(
com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.class, com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.Builder.class);
}
// Construct using com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.newBuilder()
private Builder() {
maybeForceBuilderInitialization();
}
private Builder(
com.google.protobuf.GeneratedMessageV3.BuilderParent parent) {
super(parent);
maybeForceBuilderInitialization();
}
private void maybeForceBuilderInitialization() {
if (com.google.protobuf.GeneratedMessageV3
.alwaysUseFieldBuilders) {
}
}
@java.lang.Override
public Builder clear() {
super.clear();
ip_ = "";
time_ = "";
type_ = "";
api_ = "";
num_ = "";
return this;
}
@java.lang.Override
public com.google.protobuf.Descriptors.Descriptor
getDescriptorForType() {
return com.lcc.flink.connectors.kafka.proto.AccessLogProto.internal_static_AccessLog_descriptor;
}
@java.lang.Override
public com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog getDefaultInstanceForType() {
return com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.getDefaultInstance();
}
@java.lang.Override
public com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog build() {
com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog result = buildPartial();
if (!result.isInitialized()) {
throw newUninitializedMessageException(result);
}
return result;
}
@java.lang.Override
public com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog buildPartial() {
com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog result = new com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog(this);
result.ip_ = ip_;
result.time_ = time_;
result.type_ = type_;
result.api_ = api_;
result.num_ = num_;
onBuilt();
return result;
}
@java.lang.Override
public Builder clone() {
return super.clone();
}
@java.lang.Override
public Builder setField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.setField(field, value);
}
@java.lang.Override
public Builder clearField(
com.google.protobuf.Descriptors.FieldDescriptor field) {
return super.clearField(field);
}
@java.lang.Override
public Builder clearOneof(
com.google.protobuf.Descriptors.OneofDescriptor oneof) {
return super.clearOneof(oneof);
}
@java.lang.Override
public Builder setRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
int index, java.lang.Object value) {
return super.setRepeatedField(field, index, value);
}
@java.lang.Override
public Builder addRepeatedField(
com.google.protobuf.Descriptors.FieldDescriptor field,
java.lang.Object value) {
return super.addRepeatedField(field, value);
}
@java.lang.Override
public Builder mergeFrom(com.google.protobuf.Message other) {
if (other instanceof com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog) {
return mergeFrom((com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog)other);
} else {
super.mergeFrom(other);
return this;
}
}
public Builder mergeFrom(com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog other) {
if (other == com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog.getDefaultInstance()) return this;
if (!other.getIp().isEmpty()) {
ip_ = other.ip_;
onChanged();
}
if (!other.getTime().isEmpty()) {
time_ = other.time_;
onChanged();
}
if (!other.getType().isEmpty()) {
type_ = other.type_;
onChanged();
}
if (!other.getApi().isEmpty()) {
api_ = other.api_;
onChanged();
}
if (!other.getNum().isEmpty()) {
num_ = other.num_;
onChanged();
}
this.mergeUnknownFields(other.unknownFields);
onChanged();
return this;
}
@java.lang.Override
public final boolean isInitialized() {
return true;
}
@java.lang.Override
public Builder mergeFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws java.io.IOException {
com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog parsedMessage = null;
try {
parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry);
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
parsedMessage = (com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog) e.getUnfinishedMessage();
throw e.unwrapIOException();
} finally {
if (parsedMessage != null) {
mergeFrom(parsedMessage);
}
}
return this;
}
private java.lang.Object ip_ = "";
/**
* <code>string ip = 1;</code>
*/
public java.lang.String getIp() {
java.lang.Object ref = ip_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
ip_ = s;
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* <code>string ip = 1;</code>
*/
public com.google.protobuf.ByteString
getIpBytes() {
java.lang.Object ref = ip_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
ip_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* <code>string ip = 1;</code>
*/
public Builder setIp(
java.lang.String value) {
if (value == null) {
throw new NullPointerException();
}
ip_ = value;
onChanged();
return this;
}
/**
* <code>string ip = 1;</code>
*/
public Builder clearIp() {
ip_ = getDefaultInstance().getIp();
onChanged();
return this;
}
/**
* <code>string ip = 1;</code>
*/
public Builder setIpBytes(
com.google.protobuf.ByteString value) {
if (value == null) {
throw new NullPointerException();
}
checkByteStringIsUtf8(value);
ip_ = value;
onChanged();
return this;
}
private java.lang.Object time_ = "";
/**
* <code>string time = 2;</code>
*/
public java.lang.String getTime() {
java.lang.Object ref = time_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
time_ = s;
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* <code>string time = 2;</code>
*/
public com.google.protobuf.ByteString
getTimeBytes() {
java.lang.Object ref = time_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
time_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* <code>string time = 2;</code>
*/
public Builder setTime(
java.lang.String value) {
if (value == null) {
throw new NullPointerException();
}
time_ = value;
onChanged();
return this;
}
/**
* <code>string time = 2;</code>
*/
public Builder clearTime() {
time_ = getDefaultInstance().getTime();
onChanged();
return this;
}
/**
* <code>string time = 2;</code>
*/
public Builder setTimeBytes(
com.google.protobuf.ByteString value) {
if (value == null) {
throw new NullPointerException();
}
checkByteStringIsUtf8(value);
time_ = value;
onChanged();
return this;
}
private java.lang.Object type_ = "";
/**
* <code>string type = 3;</code>
*/
public java.lang.String getType() {
java.lang.Object ref = type_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
type_ = s;
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* <code>string type = 3;</code>
*/
public com.google.protobuf.ByteString
getTypeBytes() {
java.lang.Object ref = type_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
type_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* <code>string type = 3;</code>
*/
public Builder setType(
java.lang.String value) {
if (value == null) {
throw new NullPointerException();
}
type_ = value;
onChanged();
return this;
}
/**
* <code>string type = 3;</code>
*/
public Builder clearType() {
type_ = getDefaultInstance().getType();
onChanged();
return this;
}
/**
* <code>string type = 3;</code>
*/
public Builder setTypeBytes(
com.google.protobuf.ByteString value) {
if (value == null) {
throw new NullPointerException();
}
checkByteStringIsUtf8(value);
type_ = value;
onChanged();
return this;
}
private java.lang.Object api_ = "";
/**
* <code>string api = 4;</code>
*/
public java.lang.String getApi() {
java.lang.Object ref = api_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
api_ = s;
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* <code>string api = 4;</code>
*/
public com.google.protobuf.ByteString
getApiBytes() {
java.lang.Object ref = api_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
api_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* <code>string api = 4;</code>
*/
public Builder setApi(
java.lang.String value) {
if (value == null) {
throw new NullPointerException();
}
api_ = value;
onChanged();
return this;
}
/**
* <code>string api = 4;</code>
*/
public Builder clearApi() {
api_ = getDefaultInstance().getApi();
onChanged();
return this;
}
/**
* <code>string api = 4;</code>
*/
public Builder setApiBytes(
com.google.protobuf.ByteString value) {
if (value == null) {
throw new NullPointerException();
}
checkByteStringIsUtf8(value);
api_ = value;
onChanged();
return this;
}
private java.lang.Object num_ = "";
/**
* <code>string num = 5;</code>
*/
public java.lang.String getNum() {
java.lang.Object ref = num_;
if (!(ref instanceof java.lang.String)) {
com.google.protobuf.ByteString bs =
(com.google.protobuf.ByteString) ref;
java.lang.String s = bs.toStringUtf8();
num_ = s;
return s;
} else {
return (java.lang.String) ref;
}
}
/**
* <code>string num = 5;</code>
*/
public com.google.protobuf.ByteString
getNumBytes() {
java.lang.Object ref = num_;
if (ref instanceof String) {
com.google.protobuf.ByteString b =
com.google.protobuf.ByteString.copyFromUtf8(
(java.lang.String) ref);
num_ = b;
return b;
} else {
return (com.google.protobuf.ByteString) ref;
}
}
/**
* <code>string num = 5;</code>
*/
public Builder setNum(
java.lang.String value) {
if (value == null) {
throw new NullPointerException();
}
num_ = value;
onChanged();
return this;
}
/**
* <code>string num = 5;</code>
*/
public Builder clearNum() {
num_ = getDefaultInstance().getNum();
onChanged();
return this;
}
/**
* <code>string num = 5;</code>
*/
public Builder setNumBytes(
com.google.protobuf.ByteString value) {
if (value == null) {
throw new NullPointerException();
}
checkByteStringIsUtf8(value);
num_ = value;
onChanged();
return this;
}
@java.lang.Override
public final Builder setUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.setUnknownFields(unknownFields);
}
@java.lang.Override
public final Builder mergeUnknownFields(
final com.google.protobuf.UnknownFieldSet unknownFields) {
return super.mergeUnknownFields(unknownFields);
}
// @@protoc_insertion_point(builder_scope:AccessLog)
}
// @@protoc_insertion_point(class_scope:AccessLog)
private static final com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog DEFAULT_INSTANCE;
static {
DEFAULT_INSTANCE = new com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog();
}
public static com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog getDefaultInstance() {
return DEFAULT_INSTANCE;
}
private static final com.google.protobuf.Parser<AccessLog>
PARSER = new com.google.protobuf.AbstractParser<AccessLog>() {
@java.lang.Override
public AccessLog parsePartialFrom(
com.google.protobuf.CodedInputStream input,
com.google.protobuf.ExtensionRegistryLite extensionRegistry)
throws com.google.protobuf.InvalidProtocolBufferException {
return new AccessLog(input, extensionRegistry);
}
};
public static com.google.protobuf.Parser<AccessLog> parser() {
return PARSER;
}
@java.lang.Override
public com.google.protobuf.Parser<AccessLog> getParserForType() {
return PARSER;
}
@java.lang.Override
public com.lcc.flink.connectors.kafka.proto.AccessLogProto.AccessLog getDefaultInstanceForType() {
return DEFAULT_INSTANCE;
}
}
private static final com.google.protobuf.Descriptors.Descriptor
internal_static_AccessLog_descriptor;
private static final
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable
internal_static_AccessLog_fieldAccessorTable;
public static com.google.protobuf.Descriptors.FileDescriptor
getDescriptor() {
return descriptor;
}
private static com.google.protobuf.Descriptors.FileDescriptor
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\017AccessLog.proto\"M\n\tAccessLog\022\n\n\002ip\030\001 \001" +
"(\t\022\014\n\004time\030\002 \001(\t\022\014\n\004type\030\003 \001(\t\022\013\n\003api\030\004 " +
"\001(\t\022\013\n\003num\030\005 \001(\tB6\n$com.lcc.flink.connec" +
"tors.kafka.protoB\016AccessLogProtob\006proto3"
};
descriptor = com.google.protobuf.Descriptors.FileDescriptor
.internalBuildGeneratedFileFrom(descriptorData,
new com.google.protobuf.Descriptors.FileDescriptor[] {
});
internal_static_AccessLog_descriptor =
getDescriptor().getMessageTypes().get(0);
internal_static_AccessLog_fieldAccessorTable = new
com.google.protobuf.GeneratedMessageV3.FieldAccessorTable(
internal_static_AccessLog_descriptor,
new java.lang.String[] { "Ip", "Time", "Type", "Api", "Num", });
}
// @@protoc_insertion_point(outer_class_scope)
}
真正的AccessLog实体对象
java
@Data
public class AccessLog implements Serializable {
private String ip;
private String time;
private String type;
private String api;
private Integer num;
}
然后编写我们的自定义序列化类CustomSerialSchema
java
public class CustomSerialSchema implements DeserializationSchema<AccessLog>, SerializationSchema<AccessLog> {
private static final long serialVersionUID = 1L;
private transient Charset charset;
public CustomSerialSchema() {
this(StandardCharsets.UTF_8);
}
public CustomSerialSchema(Charset charset) {
this.charset = checkNotNull(charset);
}
public Charset getCharset() {
return charset;
}
/**
* 反序列化实现
* @param message
* @return
*/
@Override
public AccessLog deserialize(byte[] message) {
AccessLog accessLog = null;
try {
AccessLogProto.AccessLog accessLogProto = AccessLogProto.AccessLog.parseFrom(message);
accessLog = new AccessLog();
BeanUtils.copyProperties(accessLogProto, accessLog);
return accessLog;
} catch (Exception e) {
e.printStackTrace();
}
return accessLog;
}
@Override
public boolean isEndOfStream(AccessLog nextElement) {
return false;
}
/**
* 序列化处理
* @param element
* @return
*/
@Override
public byte[] serialize(AccessLog element) {
AccessLogProto.AccessLog.Builder builder = AccessLogProto.AccessLog.newBuilder();
BeanUtils.copyProperties(element, builder);
return builder.build().toByteArray();
}
/**
* 定义消息类型
* @return
*/
@Override
public TypeInformation<AccessLog> getProducedType() {
return TypeInformation.of(AccessLog.class);
}
}
接下来,在从kafka读取或是写入数据,我们都可以指定自己序列化方式了
2、使用自定义序列化器写入到kafka
java
public class KafkaSinkApplication {
public static void main(String[] args) throws Exception{
// 1. 创建运行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 读取Socket数据源
DataStreamSource<String> socketStr = env.socketTextStream("localhost", 9911, "\n");
// 3. 转换处理流数据
SingleOutputStreamOperator<AccessLog> outputStream = socketStr.map(new MapFunction<String, AccessLog>() {
@Override
public AccessLog map(String value) throws Exception {
System.out.println(value);
// 根据分隔符解析数据
String[] arrValue = value.split("\t");
// 将数据组装为对象
AccessLog log = new AccessLog();
log.setNum(1);
for(int i=0; i<arrValue.length; i++) {
if(i == 0) {
log.setIp(arrValue[i]);
}else if( i== 1) {
log.setTime(arrValue[i]);
}else if( i== 2) {
log.setType(arrValue[i]);
}else if( i== 3) {
log.setApi(arrValue[i]);
}
}
return log;
}
});
// 3. Kakfa的生产者配置
FlinkKafkaProducer kafkaProducer = new FlinkKafkaProducer(
"10.10.20.132:9092", // kafka broker 连接列表信息
"flink-serial", // kafka 的连接topic
new CustomSerialSchema() // 自定义序列化实现
);
// 4. 添加kafka的写入器
outputStream.addSink(kafkaProducer);
socketStr.print().setParallelism(1);
// 5. 执行任务
env.execute("job");
}
}
3、使用自定义反序列化器从Kafka读取数据到flink
java
public class KafkaSourceApplication {
public static void main(String[] args) throws Exception {
// 1. 创建运行环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 2. 设置kafka服务连接信息
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "192.168.149.128:9092");
properties.setProperty("group.id", "flink_group");
// 3. 创建Kafka消费端
FlinkKafkaConsumer kafkaProducer = new FlinkKafkaConsumer(
"flink-serial", // 目标 topic
new CustomSerialSchema(), // 自定义序列化
properties);
// 4. 读取Kafka数据源
DataStreamSource<AccessLog> socketStr = env.addSource(kafkaProducer);
socketStr.print().setParallelism(1);
// 5. 执行任务
env.execute("custom-job");
}
}