Flink Window DEMO 学习

该文档演示了fink windows的操作DEMO

环境准备:

测试数据

自动向kafka推送数据

java 复制代码
import cn.hutool.core.date.DateUtil;
import com.alibaba.fastjson2.JSONObject;
import com.wfg.flink.example.dto.KafkaPvDto;
import com.wfg.flink.example.utils.RandomGeneratorUtils;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.time.LocalDateTime;
import java.util.Properties;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;

import static com.wfg.flink.example.constants.Constants.KAFKA_BROKERS;
import static com.wfg.flink.example.constants.Constants.TOPIC_NAME;

public class KafkaTestProducer {
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", KAFKA_BROKERS);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        try (Producer<String, String> producer = new KafkaProducer<>(props)) {
            int times = 100000;
            for (int i = 0; i < times; i++) {
                System.out.println("Send No. :" + i);
                CompletableFuture.allOf(
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer))
                ).join();
                producer.flush();
                Random random = new Random();
                int randomNumber = random.nextInt(7); // 生成一个0到6的随机数
                Thread.sleep(1000 * randomNumber);
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    private static void sendKafkaMsg(Producer<String, String> producer) {
        String msg = createMsg();
        System.out.println(msg);
        producer.send(new ProducerRecord<>(TOPIC_NAME, UUID.randomUUID().toString().replaceAll("-", ""), msg));
    }
    private static String createMsg() {
        KafkaPvDto dto = new KafkaPvDto();
        dto.setUuid(UUID.randomUUID().toString().replaceAll("-", ""));
        dto.setUserName(RandomGeneratorUtils.generateRandomFullName());
        dto.setVisitIp(RandomGeneratorUtils.generateRandomIp());
//        DateTime begin = DateUtil.beginOfDay(new Date());
//        String timeStr = DateUtil.format(RandomGeneratorUtils.generateRandomDateTime(LocalDateTimeUtil.of(begin).toLocalDate(), LocalDate.now()), "yyyy-MM-dd HH:mm:ss");
        String timeStr = DateUtil.format(LocalDateTime.now(), "yyyy-MM-dd HH:mm:ss");
        dto.setVisitTime(timeStr);
        dto.setVisitServiceIp(RandomGeneratorUtils.generateRandomIp());
        return JSONObject.toJSONString(dto);
    }
}

注意:

java 复制代码
/**
 *
 * @author wfg
 */
@Slf4j
public class DataSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
    @Override
    public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) {
        KafkaPvDto data = JSONObject.parseObject(value, KafkaPvDto.class);
        if (data != null) {
            collector.collect(new Tuple2<>(data.getUserName(), 1));
        }
    }
}

基于时间窗口

java 复制代码
**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(30))
                .sum(0)
                .print();*/
        env.execute("flink window example");
    }
}

基于滑动时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");

        //基于滑动时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(60), Time.seconds(30))
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量窗口
        data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(3)
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量滑动窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量滑动窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(4, 3)
                .sum(0)
                .print();*
        env.execute("flink window example");
    }
}

基于会话时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于会话时间窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(ProcessingTimeSessionWindows.withGap(Time.seconds(5)))
                //表示如果 5s 内没出现数据则认为超出会话时长,然后计算这个窗口的和
                .sum(1)
                .print();

        env.execute("flink window example");
    }
}

滚动窗口(Tumbling Window)

滚动窗口(Tumbling Window)

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //滚动窗口(Tumbling Window) 基于处理时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(TumblingProcessingTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();;

        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(TumblingEventTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

滑动窗口(Sliding Window)

基于处理时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于处理时间的 30 秒滑动窗口,滑动间隔为 10 秒
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(SlidingProcessingTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滑动窗口,滑动间隔为 10 秒  
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(SlidingEventTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

注意:

相关推荐
Re.不晚11 分钟前
Java入门15——抽象类
java·开发语言·学习·算法·intellij-idea
幼儿园老大*1 小时前
走进 Go 语言基础语法
开发语言·后端·学习·golang·go
1 小时前
开源竞争-数据驱动成长-11/05-大专生的思考
人工智能·笔记·学习·算法·机器学习
ctrey_1 小时前
2024-11-4 学习人工智能的Day21 openCV(3)
人工智能·opencv·学习
啦啦右一2 小时前
前端 | MYTED单篇TED词汇学习功能优化
前端·学习
霍格沃兹测试开发学社测试人社区2 小时前
软件测试学习笔记丨Flask操作数据库-数据库和表的管理
软件测试·笔记·测试开发·学习·flask
今天我又学废了2 小时前
Scala学习记录,List
学习
王俊山IT3 小时前
C++学习笔记----10、模块、头文件及各种主题(一)---- 模块(5)
开发语言·c++·笔记·学习
Mephisto.java3 小时前
【大数据学习 | kafka高级部分】kafka中的选举机制
大数据·学习·kafka
南宫生4 小时前
贪心算法习题其三【力扣】【算法学习day.20】
java·数据结构·学习·算法·leetcode·贪心算法