Flink——进行数据转换时,报:Recovery is suppressed by NoRestartBackoffTimeStrategy

热词统计案例:

用flink中的窗口函数(apply)读取kafka中数据,并对热词进行统计。

apply:全量聚合函数,指在窗口触发的时候才会对窗口内的所有数据进行一次计算(等窗口的数据到齐,才开始进行聚合计算,可实现对窗口内的数据进行排序等需求)。

代码演示:

kafka发送消息端:

java 复制代码
package com.bigdata.Day04;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Properties;
import java.util.Random;

public class Demo01_windows_kafka发消息 {

    public static void main(String[] args) throws Exception {

        // Properties 它是map的一种
        Properties properties = new Properties();
        // 设置连接kafka集群的ip和端口
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"bigdata01:9092");
        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
                "org.apache.kafka.common.serialization.StringSerializer");
        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
                "org.apache.kafka.common.serialization.StringSerializer");
        // 创建了一个消息生产者对象
        KafkaProducer kafkaProducer = new KafkaProducer<>(properties);
        String[] arr = {"联通换猫","遥遥领先","恒大歌舞团","恒大足球队","郑州烂尾楼"};
        Random random = new Random();
        for (int i = 0; i < 500; i++) {
            ProducerRecord record = new ProducerRecord<>("topic1",arr[random.nextInt(arr.length)]);
            // 调用这个里面的send方法
            kafkaProducer.send(record);
            Thread.sleep(50);
        }

        kafkaProducer.close();
    }
}

kafka接受消息端:

java 复制代码
package com.bigdata.Day04;

import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;

import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Properties;

public class Demo02_kafka收消息 {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","bigdata01:9092");
        properties.setProperty("group.id", "g2");
        FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>("topic1",new SimpleStringSchema(),properties);
        DataStreamSource<String> dataStreamSource = env.addSource(kafkaSource);
        // transformation-数据处理转换
        DataStream<Tuple2<String,Integer>> mapStream = dataStreamSource.map(new MapFunction<String, Tuple2<String,Integer>>() {
            @Override
            public Tuple2<String,Integer> map(String word) throws Exception {

                return Tuple2.of(word,1);
            }
        });
        KeyedStream<Tuple2<String, Integer>, String> keyedStream = mapStream.keyBy(tuple2 -> tuple2.f0);
        keyedStream.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
                // 第一个泛型是输入数据的类型,第二个泛型是返回值类型   第三个是key 的类型, 第四个是窗口对象
                .apply(new WindowFunction<Tuple2<String, Integer>, String, String, TimeWindow>() {
            @Override
            public void apply(
                    String key,  // 分组key    {"俄乌战争",[1,1,1,1,1]}
                    TimeWindow window, // 窗口对象
                    Iterable<Tuple2<String, Integer>> input, // 分组key在窗口的所有数据
                    Collector<String> out  // 用于输出
            ) throws Exception {
                long start = window.getStart();
                long end = window.getEnd();

                // lang3 包下的工具类
                String startStr = DateFormatUtils.format(start,"yyyy-MM-dd HH:mm:ss");
                String endStr = DateFormatUtils.format(end,"yyyy-MM-dd HH:mm:ss");
                int sum = 0;

                for(Tuple2<String,Integer> tuple2: input){
                    sum += tuple2.f1;
                }
                out.collect(key +"," + startStr +","+endStr +",sum="+sum);
            }
        }).print();



        //5. execute-执行
        env.execute();
    }
}

当执行kafka接收消息端时,会报如下错误:

错误原因:在对kafka中数据进行KeyBy分组处理时,使用了lambda表达式

解决方法:

在使用KeyBy时,将函数的各种参数类型都写清楚,修改后的代码如下:

java 复制代码
package com.bigdata.Day04;

import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;

import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Properties;

public class Demo02_kafka收消息 {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","bigdata01:9092");
        properties.setProperty("group.id", "g2");
        FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>("topic1",new SimpleStringSchema(),properties);
        DataStreamSource<String> dataStreamSource = env.addSource(kafkaSource);
        // transformation-数据处理转换
        DataStream<Tuple2<String,Integer>> mapStream = dataStreamSource.map(new MapFunction<String, Tuple2<String,Integer>>() {
            @Override
            public Tuple2<String,Integer> map(String word) throws Exception {

                return Tuple2.of(word,1);
            }
        });
        KeyedStream<Tuple2<String, Integer>, String> keyedStream = mapStream.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> tuple2) throws Exception {
                return tuple2.f0;
            }
        });
        keyedStream.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
                // 第一个泛型是输入数据的类型,第二个泛型是返回值类型   第三个是key 的类型, 第四个是窗口对象
                .apply(new WindowFunction<Tuple2<String, Integer>, String, String, TimeWindow>() {
            @Override
            public void apply(
                    String key,  // 分组key    {"俄乌战争",[1,1,1,1,1]}
                    TimeWindow window, // 窗口对象
                    Iterable<Tuple2<String, Integer>> input, // 分组key在窗口的所有数据
                    Collector<String> out  // 用于输出
            ) throws Exception {
                long start = window.getStart();
                long end = window.getEnd();

                // lang3 包下的工具类
                String startStr = DateFormatUtils.format(start,"yyyy-MM-dd HH:mm:ss");
                String endStr = DateFormatUtils.format(end,"yyyy-MM-dd HH:mm:ss");
                int sum = 0;

                for(Tuple2<String,Integer> tuple2: input){
                    sum += tuple2.f1;
                }
                out.collect(key +"," + startStr +","+endStr +",sum="+sum);
            }
        }).print();



        //5. execute-执行
        env.execute();
    }
}
相关推荐
Mikhail_G14 分钟前
Python应用变量与数据类型
大数据·运维·开发语言·python·数据分析
G皮T24 分钟前
【Elasticsearch】映射:null_value 详解
大数据·elasticsearch·搜索引擎·映射·mappings·null_value
大霸王龙2 小时前
软件工程的软件生命周期通常分为以下主要阶段
大数据·人工智能·旅游
点赋科技2 小时前
沙市区举办资本市场赋能培训会 点赋科技分享智能消费新实践
大数据·人工智能
YSGZJJ2 小时前
股指期货技术分析与短线操作方法介绍
大数据·人工智能
Doker 多克3 小时前
Flink CDC —部署模式
大数据·flink
Guheyunyi3 小时前
监测预警系统重塑隧道安全新范式
大数据·运维·人工智能·科技·安全
酷爱码3 小时前
Spring Boot 整合 Apache Flink 的详细过程
spring boot·flink·apache
问道飞鱼3 小时前
Flink 高可用集群部署指南
flink·部署·批处理·流式批处理
Channing Lewis4 小时前
如果科技足够发达,是否还需要维持自然系统(例如生物多样性)中那种‘冗余’和‘多样性’,还是可以只保留最优解?
大数据·人工智能·科技