【Flink-Kafka-To-Kafka】使用 Flink 实现 Kafka 数据写入 Kafka
需求描述:
1、数据从 Kafka 写入 Kafka。
2、相关配置存放于 Mysql 中,通过 Mysql 进行动态读取。
3、此案例中的 Kafka 是进行了 Kerberos 安全认证的,如果不需要自行修改。
4、Kafka 数据为 Json 格式,通过 FlatMap 扁平化处理后完成写入。
5、读取时使用自定义 Source,写入时使用自定义 Sink。
6、本地测试时可以编辑 resources.flink_backup_local.yml 通过 ConfigTools.initConf 方法获取配置。
1)导入依赖
这里的依赖比较冗余,大家可以根据各自需求做删除或保留。
xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>example.cn.test</groupId>
<artifactId>kafkaetl2kafka</artifactId>
<version>1.0.0</version>
<properties>
<hbase.version>2.3.3</hbase.version>
<hadoop.version>3.1.1</hadoop.version>
<spark.version>3.0.2</spark.version>
<scala.version>2.12.10</scala.version>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<flink.version>1.14.0</flink.version>
<scala.binary.version>2.12</scala.binary.version>
<target.java.version>1.8</target.java.version>
<maven.compiler.source>${target.java.version}</maven.compiler.source>
<maven.compiler.target>${target.java.version}</maven.compiler.target>
<log4j.version>2.17.2</log4j.version>
<hadoop.version>3.1.2</hadoop.version>
<hive.version>3.1.2</hive.version>
<drools.version>7.18.0.Final</drools.version>
<cos_api>5.2.4</cos_api>
</properties>
<dependencies>
<!-- 基础依赖 开始-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-java</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- 基础依赖 结束-->
<!-- TABLE 开始-->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_${scala.binary.version}</artifactId>
<version>1.14.0</version>
<scope>provided</scope>
</dependency>
<!-- 使用 hive sql时注销,其他时候可以放开 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-cep_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- TABLE 结束-->
<!-- sql 开始-->
<!-- sql解析 开始 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-json</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-csv</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- 检查点 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-state-processor-api_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.5</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<!-- 本地监控任务 结束 -->
<!-- DataStream 开始 -->
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j-impl</artifactId>
<version>${log4j.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>${log4j.version}</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>${log4j.version}</version>
<scope>runtime</scope>
</dependency>
<!-- hdfs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.3.1</version>
</dependency>
<!-- 重点,容易被忽略的jar -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- rocksdb_2 -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-statebackend-rocksdb_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.16.18</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>gaei.cn.x5l</groupId>
<artifactId>acp-decode</artifactId>
<version>1.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-sql-connector-kafka_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.jyaml</groupId>
<artifactId>jyaml</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.44</version>
</dependency>
<dependency>
<groupId>com.bazaarvoice.jolt</groupId>
<artifactId>jolt-core</artifactId>
<version>0.1.1</version>
</dependency>
<dependency>
<groupId>com.bazaarvoice.jolt</groupId>
<artifactId>json-utils</artifactId>
<version>0.1.1</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.1.23</version>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
<dependency>
<groupId>org.drools</groupId>
<artifactId>drools-core</artifactId>
<version>${drools.version}</version>
</dependency>
<dependency>
<groupId>org.drools</groupId>
<artifactId>drools-compiler</artifactId>
<version>${drools.version}</version>
</dependency>
<dependency>
<groupId>org.kie</groupId>
<artifactId>kie-api</artifactId>
<version>${drools.version}</version>
</dependency>
<dependency>
<groupId>org.drools</groupId>
<artifactId>drools-templates</artifactId>
<version>${drools.version}</version>
</dependency>
<dependency>
<groupId>org.kie</groupId>
<artifactId>kie-internal</artifactId>
<version>${drools.version}</version>
</dependency>
<dependency>
<groupId>org.drools</groupId>
<artifactId>drools-decisiontables</artifactId>
<version>${drools.version}</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.10</version>
</dependency>
<dependency>
<groupId>com.qcloud</groupId>
<artifactId>cos_api</artifactId>
<version>${cos_api}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<build>
<plugins>
<!-- Java Compiler -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>${target.java.version}</source>
<target>${target.java.version}</target>
</configuration>
</plugin>
<!-- We use the maven-shade plugin to create a fat jar that contains all necessary dependencies. -->
<!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.0.0</version>
<executions>
<!-- Run shade goal on package phase -->
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<artifactSet>
<excludes>
<exclude>org.apache.flink:force-shading</exclude>
<exclude>com.google.code.findbugs:jsr305</exclude>
<exclude>org.slf4j:*</exclude>
<exclude>org.apache.logging.log4j:*</exclude>
<exclude>org.apache.flink:flink-runtime-web_2.11</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<!-- Do not copy the signatures in the META-INF folder.
Otherwise, this might cause SecurityExceptions when using the JAR. -->
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.owp.flink.kafka.KafkaSourceDemo</mainClass>
</transformer>
<!-- flink sql 需要 -->
<!-- The service transformer is needed to merge META-INF/services files -->
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<!-- ... -->
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<!-- This improves the out-of-the-box experience in Eclipse by resolving some warnings. -->
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<versionRange>[3.0.0,)</versionRange>
<goals>
<goal>shade</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<versionRange>[3.1,)</versionRange>
<goals>
<goal>testCompile</goal>
<goal>compile</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
2)代码实现
2.1.resources
2.1.1.appconfig.yml
yml
mysql.url: "jdbc:mysql://1.1.1.1:3306/test?useSSL=false"
mysql.username: "test"
mysql.password: "123456"
mysql.driver: "com.mysql.jdbc.Driver"
2.1.2.log4j.properties
shell
log4j.rootLogger=info, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
2.1.3.log4j2.xml
shell
<?xml version="1.0" encoding="UTF-8"?>
<configuration monitorInterval="5">
<Properties>
<property name="LOG_PATTERN" value="%date{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n" />
<property name="LOG_LEVEL" value="ERROR" />
</Properties>
<appenders>
<console name="console" target="SYSTEM_OUT">
<PatternLayout pattern="${LOG_PATTERN}"/>
<ThresholdFilter level="${LOG_LEVEL}" onMatch="ACCEPT" onMismatch="DENY"/>
</console>
<File name="log" fileName="tmp/log/job.log" append="false">
<PatternLayout pattern="%d{HH:mm:ss.SSS} %-5level %class{36} %L %M - %msg%xEx%n"/>
</File>
</appenders>
<loggers>
<root level="${LOG_LEVEL}">
<appender-ref ref="console"/>
<appender-ref ref="log"/>
</root>
</loggers>
</configuration>
2.1.4.flink_backup_local.yml
yml
business:
datasource: 'TEST'
refreshdelay: 60000
refreshperiod: 60000
hdfs:
checkPointPath: 'hdfs://nameserver/user/flink/checkpoints'
checkpointTimeout: 600000
checkpointing: 60000
maxConcurrentCheckpoints: 1
minPauseBetweenCheckpoints: 10000
restartInterval: 120
restartStrategy: 3
kafka-consumer:
prop:
auto.offset.reset: 'earliest'
bootstrap.servers: 'kfk01:9092,kfk02:9092,kfk03:9092,kfk04:9092,kfk05:9092,kfk06:9092'
enable.auto.commit: 'false'
group.id: 'kafkaetl2kafka_test'
fetch.max.bytes: '10485760'
max.partition.fetch.bytes: '5242880'
max.poll.interval.ms: '2000000'
max.poll.records: '20000'
receive.buffer.bytes: '10485760'
send.buffer.bytes: '10485760'
session.timeout.ms: '18000000'
isKerberized: '1'
krb5Conf: '/opt/conf/krb5.conf'
security_protocol: 'SASL_PLAINTEXT'
useTicketCache: 'false'
serviceName: 'kafka'
keytab: '/opt/conf/test.keytab'
principal: 'test@TEST.TEST.COM'
topics: 'test'
kafka-producer:
kafkaProducersPoolSize: 5
defaultTopic: 'test'
prop:
acks: 'all'
bootstrap.servers: 'kfk01:9092,kfk02:9092,kfk03:9092,kfk04:9092,kfk05:9092,kfk06:9092'
compression.type: 'lz4'
retries: '40'
retry.backoff.ms: '5000'
batch.size: '262144'
buffer.memory: '536870912'
max.request.size: '2148576'
request.timeout.ms: '30000000'
send.buffer.bytes: '10485760'
receive.buffer.bytes: '10485760'
linger.ms: '10'
transaction.timeout.ms: '36000000'
isKerberized: '1'
krb5Conf: '/opt/conf/krb5.conf'
security_protocol: 'SASL_PLAINTEXT'
useTicketCache: 'false'
serviceName: 'kafka'
keytab: '/opt/conf/test.keytab'
principal: 'test@TEST.TEST.COM'
mysql:
password: '123456'
url: 'jdbc:mysql://1.1.1.1:3306/test'
username: 'test'
processParallelism: 48
ismap: 'false'
iskeyby: 'true'
isprint: 'false'
flatMapParallelism: '240'
sinkParallelism: '240'
sourceParallelism: '240'
redis:
block-when-exhausted: 'false'
database: 0
host: '1.1.1.1'
password: '123456'
port: 8250
timeout: 6000000
2.2.utils
2.2.1.DBConn
java
import java.sql.*;
public class DBConn {
private static final String driver = "com.mysql.jdbc.Driver"; //mysql驱动
private static Connection conn = null;
private static PreparedStatement ps = null;
private static ResultSet rs = null;
private static final CallableStatement cs = null;
/**
* 连接数据库
* @return
*/
public static Connection conn(String url,String username,String password) {
Connection conn = null;
try {
Class.forName(driver); //加载数据库驱动
try {
conn = DriverManager.getConnection(url, username, password); //连接数据库
} catch (SQLException e) {
e.printStackTrace();
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
return conn;
}
/**
* 关闭数据库链接
* @return
*/
public static void close() {
if(conn != null) {
try {
conn.close(); //关闭数据库链接
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
2.2.2.CommonUtils
java
@Slf4j
public class CommonUtils {
public static StreamExecutionEnvironment setCheckpoint(StreamExecutionEnvironment env) throws IOException {
// ConfigTools.initConf("local");
Map hdfsMap = (Map) ConfigTools.mapConf.get("hdfs");
env.enableCheckpointing(((Integer) hdfsMap.get("checkpointing")).longValue(), CheckpointingMode.EXACTLY_ONCE);//这里会造成offset提交的延迟
env.getCheckpointConfig().setMinPauseBetweenCheckpoints(((Integer) hdfsMap.get("minPauseBetweenCheckpoints")).longValue());
env.getCheckpointConfig().setCheckpointTimeout(((Integer) hdfsMap.get("checkpointTimeout")).longValue());
env.getCheckpointConfig().setMaxConcurrentCheckpoints((Integer) hdfsMap.get("maxConcurrentCheckpoints"));
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(
(Integer) hdfsMap.get("restartStrategy"), // 尝试重启的次数,不宜过小,分布式任务很容易出问题(正常情况),建议3-5次
Time.of(((Integer) hdfsMap.get("restartInterval")).longValue(), TimeUnit.SECONDS) // 延时
));
//设置可容忍的检查点失败数,默认值为0表示不允许容忍任何检查点失败
env.getCheckpointConfig().setTolerableCheckpointFailureNumber(2);
//设置状态后端存储方式
env.setStateBackend(new RocksDBStateBackend((String) hdfsMap.get("checkPointPath"), true));
// env.setStateBackend(new FsStateBackend((String) hdfsMap.get("checkPointPath"), true));
// env.setStateBackend(new HashMapStateBackend(());
return env;
}
public static FlinkKafkaConsumer<ConsumerRecord<String, String>> getKafkaConsumer(Map<String, Object> kafkaConf) throws IOException {
String[] topics = ((String) kafkaConf.get("topics")).split(",");
log.info("监听的topic: {}", topics);
Properties properties = new Properties();
Map<String, String> kafkaProp = (Map<String, String>) kafkaConf.get("prop");
for (String key : kafkaProp.keySet()) {
properties.setProperty(key, kafkaProp.get(key).toString());
}
if (!StringUtils.isBlank((String) kafkaProp.get("isKerberized")) && "1".equals(kafkaProp.get("isKerberized"))) {
System.setProperty("java.security.krb5.conf", kafkaProp.get("krb5Conf"));
properties.put("security.protocol", kafkaProp.get("security_protocol"));
properties.put("sasl.jaas.config", "com.sun.security.auth.module.Krb5LoginModule required "
+ "useTicketCache=" + kafkaProp.get("useTicketCache") + " "
+ "serviceName=\"" + kafkaProp.get("serviceName") + "\" "
+ "useKeyTab=true "
+ "keyTab=\"" + kafkaProp.get("keytab").toString() + "\" "
+ "principal=\"" + kafkaProp.get("principal").toString() + "\";");
}
properties.put("key.serializer", "org.apache.flink.kafka.shaded.org.apache.kafka.common.serialization.ByteArrayDeserializer");
properties.put("value.serializer", "org.apache.flink.kafka.shaded.org.apache.kafka.common.serialization.ByteArrayDeserializer");
FlinkKafkaConsumer<ConsumerRecord<String, String>> consumerRecordFlinkKafkaConsumer = new FlinkKafkaConsumer<ConsumerRecord<String, String>>(Arrays.asList(topics), new KafkaDeserializationSchema<ConsumerRecord<String, String>>() {
@Override
public TypeInformation<ConsumerRecord<String, String>> getProducedType() {
return TypeInformation.of(new TypeHint<ConsumerRecord<String, String>>() {
});
}
@Override
public boolean isEndOfStream(ConsumerRecord<String, String> stringStringConsumerRecord) {
return false;
}
@Override
public ConsumerRecord<String, String> deserialize(ConsumerRecord<byte[], byte[]> record) throws Exception {
return new ConsumerRecord<String, String>(
record.topic(),
record.partition(),
record.offset(),
record.timestamp(),
record.timestampType(),
record.checksum(),
record.serializedKeySize(),
record.serializedValueSize(),
new String(record.key() == null ? "".getBytes(StandardCharsets.UTF_8) : record.key(), StandardCharsets.UTF_8),
new String(record.value() == null ? "{}".getBytes(StandardCharsets.UTF_8) : record.value(), StandardCharsets.UTF_8));
}
}, properties);
return consumerRecordFlinkKafkaConsumer;
}
public static FlinkKafkaProducer getKafkaSink(Map conf) {
Map kafkaProducer = (Map) conf.get("kafka-producer");
Integer kafkaProducersPoolSize = (Integer) kafkaProducer.get("kafkaProducersPoolSize") == null ? 5 : (Integer) kafkaProducer.get("kafkaProducersPoolSize");
/* conf/ 为配置文件相对yarn cache的相对路径,这里源kafka和目的kafka用的是同一个krb5.conf,如果源kafka和目的kafka使用不同的KDC,
需要分别设置对应各自KDC的krb5.conf
*/
// System.setProperty("java.security.krb5.conf", "conf/krb5.conf");
String defaultTopic = (String) kafkaProducer.get("defaultTopic");
Map<String, String> kafkaProp = (Map<String, String>) kafkaProducer.get("prop");
Properties properties = new Properties();
for (Map.Entry<String, String> entry : kafkaProp.entrySet()) {
String mapKey = entry.getKey();
String mapValue = entry.getValue();
properties.put(mapKey, mapValue);
}
if (!StringUtils.isBlank((String) kafkaProp.get("isKerberized")) && "1".equals(kafkaProp.get("isKerberized"))) {
System.setProperty("java.security.krb5.conf", kafkaProp.get("krb5Conf"));
properties.put("security.protocol", kafkaProp.get("security_protocol"));
properties.put("sasl.jaas.config", "com.sun.security.auth.module.Krb5LoginModule required "
+ "useTicketCache=" + kafkaProp.get("useTicketCache") + " "
+ "serviceName=\"" + kafkaProp.get("serviceName") + "\" "
+ "useKeyTab=true "
+ "keyTab=\"" + kafkaProp.get("keytab").toString() + "\" "
+ "principal=\"" + kafkaProp.get("principal").toString() + "\";");
}
FlinkKafkaProducer<ConsumerRecord<String, String>> myProducer = new FlinkKafkaProducer<ConsumerRecord<String, String>>(
defaultTopic,
new ProductDeSerializationSchema(defaultTopic),
properties,
FlinkKafkaProducer.Semantic.AT_LEAST_ONCE, kafkaProducersPoolSize);
return myProducer;
}
}
2.3.conf
2.3.1.ConfigTools
java
@Slf4j
public class ConfigTools {
public static Map<String, Object> mapConf;
/**
* 获取对应的配置文件
*
* @param option
*/
public static void initConf(String option) {
String confFile = "/flink_backup_" + option + ".yml";
try {
InputStream dumpFile = ConfigTools.class.getResourceAsStream(confFile);
mapConf = Yaml.loadType(dumpFile, HashMap.class);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 获取对应的配置文件
*
* @param option
*/
public static void initMySqlConf(String option, Class clazz) {
String className = clazz.getName();
String confFile = "/appconfig.yml";
Map<String, String> mysqlConf;
try {
InputStream dumpFile = ConfigTools.class.getResourceAsStream(confFile);
mysqlConf = Yaml.loadType(dumpFile, HashMap.class);
String username = mysqlConf.get("mysql.username");
String password = mysqlConf.get("mysql.password");
String url = mysqlConf.get("mysql.url");
Connection conn = DBConn.conn(url, username, password);
Map<String, Object> config = getConfig(conn, className, option);
if (config == null || config.size() == 0) {
log.error("获取配置文件失败");
return;
}
mapConf = config;
} catch (Exception e) {
e.printStackTrace();
}
}
private static Map<String, Object> getConfig(Connection conn, String className, String option) throws SQLException {
PreparedStatement preparedStatement = null;
try {
String sql = "select config_context from app_config where app_name = '%s' and config_name = '%s'";
preparedStatement = conn.prepareStatement(String.format(sql, className, option));
ResultSet rs = preparedStatement.executeQuery();
Map<String, String> map = new LinkedHashMap<>();
String config_context = "";
while (rs.next()) {
config_context = rs.getString("config_context");
}
System.out.println("配置信息config_context:"+config_context);
// if(StringUtils.isNotBlank(config_context)){
// System.out.println(JSONObject.toJSONString(JSONObject.parseObject(config_context), SerializerFeature.PrettyFormat));
// }
Map<String, Object> mysqlConfMap = JSON.parseObject(config_context, Map.class);
return mysqlConfMap;
}finally {
if (preparedStatement != null) {
preparedStatement.close();
}
if (conn != null) {
conn.close();
}
}
}
public static void main(String[] args) {
// initMySqlConf("local", TboxPeriodBackoutA3K.class);
initConf("local");
String s = JSON.toJSONString(mapConf);
System.out.println(s);
}
}
2.4.serialier
2.4.1.ProductDeSerializationSchema
java
/**
* flink生产者序列化类
*/
public class ProductDeSerializationSchema implements KafkaSerializationSchema<ConsumerRecord<String, String>> {
private String topic;
// public Map conf;
// public Map<String, String> map = new HashMap<>();
public ProductDeSerializationSchema(String topic) {
this.topic = topic;
}
// @Override
// public void open(SerializationSchema.InitializationContext context) throws Exception {
// Map kafkaProducer = (Map) conf.get("kafka-producer");
// Map kafkaConsumer = (Map) conf.get("kafka-consumer");
// List<String> sinkTopics = Arrays.asList(((String) kafkaProducer.get("outPutTopic")).split(","));
// List<String> sourceTopics = Arrays.asList(((String) kafkaConsumer.get("topics")).split(","));
// for (int i = 0; i < sourceTopics.size(); i++) {
// map.put(sourceTopics.get(i), sinkTopics.get(i));
// }
// }
@Override
public ProducerRecord<byte[], byte[]> serialize(ConsumerRecord<String, String> record, @Nullable Long aLong) {
return new ProducerRecord<byte[], byte[]>(topic, record.key().getBytes(), record.value().getBytes());
}
}
2.5.kafka2kafka
2.5.1.Kafka2Kafka
java
public class Kafka2Kafka {
public static Logger logger = Logger.getLogger(Kafka2Kafka.class);
public static void main(String[] args) throws Exception {
ConfigTools.initMySqlConf(args[0], Kafka2Kafka.class);
// ConfigTools.initConf("local");
//获取环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().disableOperatorChaining();
//配置checkpoint
CommonUtils.setCheckpoint(env);
FlinkKafkaConsumer<ConsumerRecord<String, String>> myConsumer = CommonUtils.getKafkaConsumer();
//获取mysql配置文件
Map<String, Object> mapConf = ConfigTools.mapConf;
DataStream<ConsumerRecord<String, String>> stream = env.addSource(myConsumer);
//sink
FlinkKafkaProducer kafkaSink = CommonUtils.getKafkaSink(mapConf);
stream.addSink(kafkaSink);
//执行
env.execute();
}
}