Flink——进行数据转换时,报:Recovery is suppressed by NoRestartBackoffTimeStrategy

热词统计案例:

用flink中的窗口函数(apply)读取kafka中数据,并对热词进行统计。

apply:全量聚合函数,指在窗口触发的时候才会对窗口内的所有数据进行一次计算(等窗口的数据到齐,才开始进行聚合计算,可实现对窗口内的数据进行排序等需求)。

代码演示:

kafka发送消息端:

java 复制代码
package com.bigdata.Day04;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.util.Properties;
import java.util.Random;

public class Demo01_windows_kafka发消息 {

    public static void main(String[] args) throws Exception {

        // Properties 它是map的一种
        Properties properties = new Properties();
        // 设置连接kafka集群的ip和端口
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"bigdata01:9092");
        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,
                "org.apache.kafka.common.serialization.StringSerializer");
        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,
                "org.apache.kafka.common.serialization.StringSerializer");
        // 创建了一个消息生产者对象
        KafkaProducer kafkaProducer = new KafkaProducer<>(properties);
        String[] arr = {"联通换猫","遥遥领先","恒大歌舞团","恒大足球队","郑州烂尾楼"};
        Random random = new Random();
        for (int i = 0; i < 500; i++) {
            ProducerRecord record = new ProducerRecord<>("topic1",arr[random.nextInt(arr.length)]);
            // 调用这个里面的send方法
            kafkaProducer.send(record);
            Thread.sleep(50);
        }

        kafkaProducer.close();
    }
}

kafka接受消息端:

java 复制代码
package com.bigdata.Day04;

import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;

import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Properties;

public class Demo02_kafka收消息 {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","bigdata01:9092");
        properties.setProperty("group.id", "g2");
        FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>("topic1",new SimpleStringSchema(),properties);
        DataStreamSource<String> dataStreamSource = env.addSource(kafkaSource);
        // transformation-数据处理转换
        DataStream<Tuple2<String,Integer>> mapStream = dataStreamSource.map(new MapFunction<String, Tuple2<String,Integer>>() {
            @Override
            public Tuple2<String,Integer> map(String word) throws Exception {

                return Tuple2.of(word,1);
            }
        });
        KeyedStream<Tuple2<String, Integer>, String> keyedStream = mapStream.keyBy(tuple2 -> tuple2.f0);
        keyedStream.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
                // 第一个泛型是输入数据的类型,第二个泛型是返回值类型   第三个是key 的类型, 第四个是窗口对象
                .apply(new WindowFunction<Tuple2<String, Integer>, String, String, TimeWindow>() {
            @Override
            public void apply(
                    String key,  // 分组key    {"俄乌战争",[1,1,1,1,1]}
                    TimeWindow window, // 窗口对象
                    Iterable<Tuple2<String, Integer>> input, // 分组key在窗口的所有数据
                    Collector<String> out  // 用于输出
            ) throws Exception {
                long start = window.getStart();
                long end = window.getEnd();

                // lang3 包下的工具类
                String startStr = DateFormatUtils.format(start,"yyyy-MM-dd HH:mm:ss");
                String endStr = DateFormatUtils.format(end,"yyyy-MM-dd HH:mm:ss");
                int sum = 0;

                for(Tuple2<String,Integer> tuple2: input){
                    sum += tuple2.f1;
                }
                out.collect(key +"," + startStr +","+endStr +",sum="+sum);
            }
        }).print();



        //5. execute-执行
        env.execute();
    }
}

当执行kafka接收消息端时,会报如下错误:

错误原因:在对kafka中数据进行KeyBy分组处理时,使用了lambda表达式

解决方法:

在使用KeyBy时,将函数的各种参数类型都写清楚,修改后的代码如下:

java 复制代码
package com.bigdata.Day04;

import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;

import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.util.Properties;

public class Demo02_kafka收消息 {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers","bigdata01:9092");
        properties.setProperty("group.id", "g2");
        FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>("topic1",new SimpleStringSchema(),properties);
        DataStreamSource<String> dataStreamSource = env.addSource(kafkaSource);
        // transformation-数据处理转换
        DataStream<Tuple2<String,Integer>> mapStream = dataStreamSource.map(new MapFunction<String, Tuple2<String,Integer>>() {
            @Override
            public Tuple2<String,Integer> map(String word) throws Exception {

                return Tuple2.of(word,1);
            }
        });
        KeyedStream<Tuple2<String, Integer>, String> keyedStream = mapStream.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> tuple2) throws Exception {
                return tuple2.f0;
            }
        });
        keyedStream.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
                // 第一个泛型是输入数据的类型,第二个泛型是返回值类型   第三个是key 的类型, 第四个是窗口对象
                .apply(new WindowFunction<Tuple2<String, Integer>, String, String, TimeWindow>() {
            @Override
            public void apply(
                    String key,  // 分组key    {"俄乌战争",[1,1,1,1,1]}
                    TimeWindow window, // 窗口对象
                    Iterable<Tuple2<String, Integer>> input, // 分组key在窗口的所有数据
                    Collector<String> out  // 用于输出
            ) throws Exception {
                long start = window.getStart();
                long end = window.getEnd();

                // lang3 包下的工具类
                String startStr = DateFormatUtils.format(start,"yyyy-MM-dd HH:mm:ss");
                String endStr = DateFormatUtils.format(end,"yyyy-MM-dd HH:mm:ss");
                int sum = 0;

                for(Tuple2<String,Integer> tuple2: input){
                    sum += tuple2.f1;
                }
                out.collect(key +"," + startStr +","+endStr +",sum="+sum);
            }
        }).print();



        //5. execute-执行
        env.execute();
    }
}
相关推荐
青岛前景互联信息技术有限公司10 小时前
OpenClaw 重构智慧消防:AI时代的平台融合实践
大数据·人工智能
梦梦代码精11 小时前
BuildingAI 上部署自定义工作流智能体:5 个实用技巧
大数据·人工智能·算法·开源软件
极客老王说Agent11 小时前
2026智造前瞻:实在Agent生产排期智能助理核心功能与使用方法详解
大数据·人工智能·ai·chatgpt
数智化精益手记局12 小时前
什么是设备维护管理?设备维护管理包含哪些内容?
大数据·网络·人工智能·安全·信息可视化
AllData公司负责人13 小时前
通过Postgresql同步到Doris,全视角演示AllData数据中台核心功能效果,涵盖:数据入湖仓,数据同步,数据处理,数据服务,BI可视化驾驶舱
java·大数据·数据库·数据仓库·人工智能·python·postgresql
桃花键神13 小时前
Bright Data Web Scraping指南 2026: 使用 MCP + Dify 自动采集海外社交媒体数据
大数据·前端·人工智能
程序鉴定师16 小时前
西安App开发推荐与业界认可的优秀实践
大数据·小程序
workflower16 小时前
从拿订单到看方向
大数据·人工智能·设计模式·机器人·动态规划
CableTech_SQH17 小时前
F5G 全光网,赋能智慧校园数字化建设
大数据·网络·5g·运维开发·信息与通信
goyeer17 小时前
【ITIL4】- 服务价值体系
大数据·运维·信息化·自动运维·itil