Flink Window DEMO 学习

该文档演示了fink windows的操作DEMO

环境准备:

测试数据

自动向kafka推送数据

java 复制代码
import cn.hutool.core.date.DateUtil;
import com.alibaba.fastjson2.JSONObject;
import com.wfg.flink.example.dto.KafkaPvDto;
import com.wfg.flink.example.utils.RandomGeneratorUtils;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.time.LocalDateTime;
import java.util.Properties;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;

import static com.wfg.flink.example.constants.Constants.KAFKA_BROKERS;
import static com.wfg.flink.example.constants.Constants.TOPIC_NAME;

public class KafkaTestProducer {
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", KAFKA_BROKERS);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        try (Producer<String, String> producer = new KafkaProducer<>(props)) {
            int times = 100000;
            for (int i = 0; i < times; i++) {
                System.out.println("Send No. :" + i);
                CompletableFuture.allOf(
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer))
                ).join();
                producer.flush();
                Random random = new Random();
                int randomNumber = random.nextInt(7); // 生成一个0到6的随机数
                Thread.sleep(1000 * randomNumber);
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    private static void sendKafkaMsg(Producer<String, String> producer) {
        String msg = createMsg();
        System.out.println(msg);
        producer.send(new ProducerRecord<>(TOPIC_NAME, UUID.randomUUID().toString().replaceAll("-", ""), msg));
    }
    private static String createMsg() {
        KafkaPvDto dto = new KafkaPvDto();
        dto.setUuid(UUID.randomUUID().toString().replaceAll("-", ""));
        dto.setUserName(RandomGeneratorUtils.generateRandomFullName());
        dto.setVisitIp(RandomGeneratorUtils.generateRandomIp());
//        DateTime begin = DateUtil.beginOfDay(new Date());
//        String timeStr = DateUtil.format(RandomGeneratorUtils.generateRandomDateTime(LocalDateTimeUtil.of(begin).toLocalDate(), LocalDate.now()), "yyyy-MM-dd HH:mm:ss");
        String timeStr = DateUtil.format(LocalDateTime.now(), "yyyy-MM-dd HH:mm:ss");
        dto.setVisitTime(timeStr);
        dto.setVisitServiceIp(RandomGeneratorUtils.generateRandomIp());
        return JSONObject.toJSONString(dto);
    }
}

注意:

java 复制代码
/**
 *
 * @author wfg
 */
@Slf4j
public class DataSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
    @Override
    public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) {
        KafkaPvDto data = JSONObject.parseObject(value, KafkaPvDto.class);
        if (data != null) {
            collector.collect(new Tuple2<>(data.getUserName(), 1));
        }
    }
}

基于时间窗口

java 复制代码
**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(30))
                .sum(0)
                .print();*/
        env.execute("flink window example");
    }
}

基于滑动时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");

        //基于滑动时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(60), Time.seconds(30))
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量窗口
        data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(3)
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量滑动窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量滑动窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(4, 3)
                .sum(0)
                .print();*
        env.execute("flink window example");
    }
}

基于会话时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于会话时间窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(ProcessingTimeSessionWindows.withGap(Time.seconds(5)))
                //表示如果 5s 内没出现数据则认为超出会话时长,然后计算这个窗口的和
                .sum(1)
                .print();

        env.execute("flink window example");
    }
}

滚动窗口(Tumbling Window)

滚动窗口(Tumbling Window)

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //滚动窗口(Tumbling Window) 基于处理时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(TumblingProcessingTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();;

        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(TumblingEventTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

滑动窗口(Sliding Window)

基于处理时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于处理时间的 30 秒滑动窗口,滑动间隔为 10 秒
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(SlidingProcessingTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滑动窗口,滑动间隔为 10 秒  
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(SlidingEventTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

注意:

相关推荐
云上艺旅20 小时前
K8S学习之基础七十四:部署在线书店bookinfo
学习·云原生·容器·kubernetes
你觉得20520 小时前
哈尔滨工业大学DeepSeek公开课:探索大模型原理、技术与应用从GPT到DeepSeek|附视频与讲义下载方法
大数据·人工智能·python·gpt·学习·机器学习·aigc
A旧城以西21 小时前
数据结构(JAVA)单向,双向链表
java·开发语言·数据结构·学习·链表·intellij-idea·idea
无所谓จุ๊บ21 小时前
VTK知识学习(50)- 交互与Widget(一)
学习·vtk
FAREWELL000751 天前
C#核心学习(七)面向对象--封装(6)C#中的拓展方法与运算符重载: 让代码更“聪明”的魔法
学习·c#·面向对象·运算符重载·oop·拓展方法
吴梓穆1 天前
UE5学习笔记 FPS游戏制作38 继承标准UI
笔记·学习·ue5
Three~stone1 天前
MySQL学习集--DDL
数据库·sql·学习
齐尹秦1 天前
HTML 音频(Audio)学习笔记
学习
瞌睡不来1 天前
(学习总结32)Linux 基础 IO
linux·学习·io
Moonnnn.1 天前
运算放大器(四)滤波电路(滤波器)
笔记·学习·硬件工程