Flink Window DEMO 学习

该文档演示了fink windows的操作DEMO

环境准备:

测试数据

自动向kafka推送数据

java 复制代码
import cn.hutool.core.date.DateUtil;
import com.alibaba.fastjson2.JSONObject;
import com.wfg.flink.example.dto.KafkaPvDto;
import com.wfg.flink.example.utils.RandomGeneratorUtils;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.time.LocalDateTime;
import java.util.Properties;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;

import static com.wfg.flink.example.constants.Constants.KAFKA_BROKERS;
import static com.wfg.flink.example.constants.Constants.TOPIC_NAME;

public class KafkaTestProducer {
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", KAFKA_BROKERS);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        try (Producer<String, String> producer = new KafkaProducer<>(props)) {
            int times = 100000;
            for (int i = 0; i < times; i++) {
                System.out.println("Send No. :" + i);
                CompletableFuture.allOf(
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer))
                ).join();
                producer.flush();
                Random random = new Random();
                int randomNumber = random.nextInt(7); // 生成一个0到6的随机数
                Thread.sleep(1000 * randomNumber);
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    private static void sendKafkaMsg(Producer<String, String> producer) {
        String msg = createMsg();
        System.out.println(msg);
        producer.send(new ProducerRecord<>(TOPIC_NAME, UUID.randomUUID().toString().replaceAll("-", ""), msg));
    }
    private static String createMsg() {
        KafkaPvDto dto = new KafkaPvDto();
        dto.setUuid(UUID.randomUUID().toString().replaceAll("-", ""));
        dto.setUserName(RandomGeneratorUtils.generateRandomFullName());
        dto.setVisitIp(RandomGeneratorUtils.generateRandomIp());
//        DateTime begin = DateUtil.beginOfDay(new Date());
//        String timeStr = DateUtil.format(RandomGeneratorUtils.generateRandomDateTime(LocalDateTimeUtil.of(begin).toLocalDate(), LocalDate.now()), "yyyy-MM-dd HH:mm:ss");
        String timeStr = DateUtil.format(LocalDateTime.now(), "yyyy-MM-dd HH:mm:ss");
        dto.setVisitTime(timeStr);
        dto.setVisitServiceIp(RandomGeneratorUtils.generateRandomIp());
        return JSONObject.toJSONString(dto);
    }
}

注意:

java 复制代码
/**
 *
 * @author wfg
 */
@Slf4j
public class DataSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
    @Override
    public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) {
        KafkaPvDto data = JSONObject.parseObject(value, KafkaPvDto.class);
        if (data != null) {
            collector.collect(new Tuple2<>(data.getUserName(), 1));
        }
    }
}

基于时间窗口

java 复制代码
**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(30))
                .sum(0)
                .print();*/
        env.execute("flink window example");
    }
}

基于滑动时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");

        //基于滑动时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(60), Time.seconds(30))
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量窗口
        data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(3)
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量滑动窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量滑动窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(4, 3)
                .sum(0)
                .print();*
        env.execute("flink window example");
    }
}

基于会话时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于会话时间窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(ProcessingTimeSessionWindows.withGap(Time.seconds(5)))
                //表示如果 5s 内没出现数据则认为超出会话时长,然后计算这个窗口的和
                .sum(1)
                .print();

        env.execute("flink window example");
    }
}

滚动窗口(Tumbling Window)

滚动窗口(Tumbling Window)

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //滚动窗口(Tumbling Window) 基于处理时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(TumblingProcessingTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();;

        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(TumblingEventTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

滑动窗口(Sliding Window)

基于处理时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于处理时间的 30 秒滑动窗口,滑动间隔为 10 秒
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(SlidingProcessingTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滑动窗口,滑动间隔为 10 秒  
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(SlidingEventTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

注意:

相关推荐
yzqy_24 分钟前
AMD AI 开发者计划学习笔记:从 ROCm 到 Ryzen AI,理解 AMD 的 AI 开发生态
人工智能·笔记·学习·datawhale·amdev
H__Rick1 小时前
C51学习-DAY4
嵌入式硬件·学习·51单片机·硬件工程
red_redemption2 小时前
自由学习记录(201)
学习
一条泥憨鱼2 小时前
Java开发效率神器:Lombok从入门到精通!
java·后端·学习·开发·lombok
Niuguangshuo2 小时前
LangChain学习之旅(三):用Memory赋予模型记忆
学习·langchain
H__Rick2 小时前
C51学习-DAY8
单片机·嵌入式硬件·学习
chase。3 小时前
【学习笔记】Dexora:面向高自由度双臂灵巧操作的开源 VLA 系统
笔记·学习
風清掦3 小时前
【STM32学习笔记-15】FLASH 闪存(Claude)
笔记·stm32·单片机·嵌入式硬件·学习
新时代牛马3 小时前
内核调试方法
linux·学习
暴躁小师兄数据学院3 小时前
【AI大数据工程师特训笔记】第15讲:大数据环境安装
大数据·hadoop·flink·spark