Flink Window DEMO 学习

该文档演示了fink windows的操作DEMO

环境准备:

测试数据

自动向kafka推送数据

java 复制代码
import cn.hutool.core.date.DateUtil;
import com.alibaba.fastjson2.JSONObject;
import com.wfg.flink.example.dto.KafkaPvDto;
import com.wfg.flink.example.utils.RandomGeneratorUtils;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.time.LocalDateTime;
import java.util.Properties;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;

import static com.wfg.flink.example.constants.Constants.KAFKA_BROKERS;
import static com.wfg.flink.example.constants.Constants.TOPIC_NAME;

public class KafkaTestProducer {
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", KAFKA_BROKERS);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        try (Producer<String, String> producer = new KafkaProducer<>(props)) {
            int times = 100000;
            for (int i = 0; i < times; i++) {
                System.out.println("Send No. :" + i);
                CompletableFuture.allOf(
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer))
                ).join();
                producer.flush();
                Random random = new Random();
                int randomNumber = random.nextInt(7); // 生成一个0到6的随机数
                Thread.sleep(1000 * randomNumber);
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    private static void sendKafkaMsg(Producer<String, String> producer) {
        String msg = createMsg();
        System.out.println(msg);
        producer.send(new ProducerRecord<>(TOPIC_NAME, UUID.randomUUID().toString().replaceAll("-", ""), msg));
    }
    private static String createMsg() {
        KafkaPvDto dto = new KafkaPvDto();
        dto.setUuid(UUID.randomUUID().toString().replaceAll("-", ""));
        dto.setUserName(RandomGeneratorUtils.generateRandomFullName());
        dto.setVisitIp(RandomGeneratorUtils.generateRandomIp());
//        DateTime begin = DateUtil.beginOfDay(new Date());
//        String timeStr = DateUtil.format(RandomGeneratorUtils.generateRandomDateTime(LocalDateTimeUtil.of(begin).toLocalDate(), LocalDate.now()), "yyyy-MM-dd HH:mm:ss");
        String timeStr = DateUtil.format(LocalDateTime.now(), "yyyy-MM-dd HH:mm:ss");
        dto.setVisitTime(timeStr);
        dto.setVisitServiceIp(RandomGeneratorUtils.generateRandomIp());
        return JSONObject.toJSONString(dto);
    }
}

注意:

java 复制代码
/**
 *
 * @author wfg
 */
@Slf4j
public class DataSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
    @Override
    public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) {
        KafkaPvDto data = JSONObject.parseObject(value, KafkaPvDto.class);
        if (data != null) {
            collector.collect(new Tuple2<>(data.getUserName(), 1));
        }
    }
}

基于时间窗口

java 复制代码
**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(30))
                .sum(0)
                .print();*/
        env.execute("flink window example");
    }
}

基于滑动时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");

        //基于滑动时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(60), Time.seconds(30))
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量窗口
        data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(3)
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量滑动窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量滑动窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(4, 3)
                .sum(0)
                .print();*
        env.execute("flink window example");
    }
}

基于会话时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于会话时间窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(ProcessingTimeSessionWindows.withGap(Time.seconds(5)))
                //表示如果 5s 内没出现数据则认为超出会话时长,然后计算这个窗口的和
                .sum(1)
                .print();

        env.execute("flink window example");
    }
}

滚动窗口(Tumbling Window)

滚动窗口(Tumbling Window)

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //滚动窗口(Tumbling Window) 基于处理时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(TumblingProcessingTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();;

        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(TumblingEventTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

滑动窗口(Sliding Window)

基于处理时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于处理时间的 30 秒滑动窗口,滑动间隔为 10 秒
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(SlidingProcessingTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滑动窗口,滑动间隔为 10 秒  
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(SlidingEventTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

注意:

相关推荐
Jackyzhe6 小时前
Flink学习笔记:整体架构
笔记·flink
西西西仓鼠6 小时前
python学习打卡:DAY 40 训练和测试的规范写法
学习
Magnetic_h7 小时前
【iOS】方法与消息底层分析
笔记·学习·macos·ios·objective-c·cocoa
今天背单词了吗9807 小时前
算法学习笔记:19.牛顿迭代法——从原理到实战,涵盖 LeetCode 与考研 408 例题
笔记·学习·算法·牛顿迭代法
小菜鸡06267 小时前
FlinkSQL通解
大数据·flink
DKPT8 小时前
Java设计模式之行为型模式(观察者模式)介绍与说明
java·笔记·学习·观察者模式·设计模式
future14129 小时前
C#进阶学习日记
数据结构·学习
渣渣盟10 小时前
Flink数据流高效写入MySQL实战
mysql·flink·scala
lxsy10 小时前
spring-ai-alibaba 1.0.0.2 学习(十六)——多模态
人工智能·学习·ai-alibaba
xian_wwq11 小时前
【学习笔记】Nginx常用安全配置
笔记·学习·nginx