Flink Window DEMO 学习

该文档演示了fink windows的操作DEMO

环境准备:

测试数据

自动向kafka推送数据

java 复制代码
import cn.hutool.core.date.DateUtil;
import com.alibaba.fastjson2.JSONObject;
import com.wfg.flink.example.dto.KafkaPvDto;
import com.wfg.flink.example.utils.RandomGeneratorUtils;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.time.LocalDateTime;
import java.util.Properties;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;

import static com.wfg.flink.example.constants.Constants.KAFKA_BROKERS;
import static com.wfg.flink.example.constants.Constants.TOPIC_NAME;

public class KafkaTestProducer {
    public static void main(String[] args) {
        Properties props = new Properties();
        props.put("bootstrap.servers", KAFKA_BROKERS);
        props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        try (Producer<String, String> producer = new KafkaProducer<>(props)) {
            int times = 100000;
            for (int i = 0; i < times; i++) {
                System.out.println("Send No. :" + i);
                CompletableFuture.allOf(
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer)),
                        CompletableFuture.runAsync(() -> sendKafkaMsg(producer))
                ).join();
                producer.flush();
                Random random = new Random();
                int randomNumber = random.nextInt(7); // 生成一个0到6的随机数
                Thread.sleep(1000 * randomNumber);
            }
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
    }
    private static void sendKafkaMsg(Producer<String, String> producer) {
        String msg = createMsg();
        System.out.println(msg);
        producer.send(new ProducerRecord<>(TOPIC_NAME, UUID.randomUUID().toString().replaceAll("-", ""), msg));
    }
    private static String createMsg() {
        KafkaPvDto dto = new KafkaPvDto();
        dto.setUuid(UUID.randomUUID().toString().replaceAll("-", ""));
        dto.setUserName(RandomGeneratorUtils.generateRandomFullName());
        dto.setVisitIp(RandomGeneratorUtils.generateRandomIp());
//        DateTime begin = DateUtil.beginOfDay(new Date());
//        String timeStr = DateUtil.format(RandomGeneratorUtils.generateRandomDateTime(LocalDateTimeUtil.of(begin).toLocalDate(), LocalDate.now()), "yyyy-MM-dd HH:mm:ss");
        String timeStr = DateUtil.format(LocalDateTime.now(), "yyyy-MM-dd HH:mm:ss");
        dto.setVisitTime(timeStr);
        dto.setVisitServiceIp(RandomGeneratorUtils.generateRandomIp());
        return JSONObject.toJSONString(dto);
    }
}

注意:

java 复制代码
/**
 *
 * @author wfg
 */
@Slf4j
public class DataSplitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
    @Override
    public void flatMap(String value, Collector<Tuple2<String, Integer>> collector) {
        KafkaPvDto data = JSONObject.parseObject(value, KafkaPvDto.class);
        if (data != null) {
            collector.collect(new Tuple2<>(data.getUserName(), 1));
        }
    }
}

基于时间窗口

java 复制代码
**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(30))
                .sum(0)
                .print();*/
        env.execute("flink window example");
    }
}

基于滑动时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");

        //基于滑动时间窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .timeWindow(Time.seconds(60), Time.seconds(30))
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量窗口
        data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(3)
                .sum(0)
                .print();
        env.execute("flink window example");
    }
}

基于事件数量滑动窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于事件数量滑动窗口
       data.flatMap(new DataSplitter())
                .keyBy(1)
                .countWindow(4, 3)
                .sum(0)
                .print();*
        env.execute("flink window example");
    }
}

基于会话时间窗口

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //基于会话时间窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(ProcessingTimeSessionWindows.withGap(Time.seconds(5)))
                //表示如果 5s 内没出现数据则认为超出会话时长,然后计算这个窗口的和
                .sum(1)
                .print();

        env.execute("flink window example");
    }
}

滚动窗口(Tumbling Window)

滚动窗口(Tumbling Window)

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
        //滚动窗口(Tumbling Window) 基于处理时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(TumblingProcessingTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();;

        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滚动窗口
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(TumblingEventTimeWindows.of(Time.seconds(30)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

滑动窗口(Sliding Window)

基于处理时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于处理时间的 30 秒滑动窗口,滑动间隔为 10 秒
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .window(SlidingProcessingTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

基于事件时间

java 复制代码
/**
 * Desc: Flink Window 学习
 */
@Slf4j
public class WindowsDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        String brokers = "localhost:9092";
        KafkaSource<String> source = KafkaSource.<String>builder()
                .setBootstrapServers(brokers)
                .setTopics(TOPIC_NAME)
                .setGroupId("my-group")
                .setStartingOffsets(OffsetsInitializer.earliest())
                .setValueOnlyDeserializer(new SimpleStringSchema())
                .build();
        DataStreamSource<String> data = env.fromSource(source, WatermarkStrategy.noWatermarks(), "wfgxxx");
		// 基于事件时间的 30 秒滑动窗口,滑动间隔为 10 秒  
        data.flatMap(new DataSplitter())
                .keyBy(v->v.f0)
                .assignTimestampsAndWatermarks(/* 分配时间戳和水印 */)
                .window(SlidingEventTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .print();
        env.execute("flink window example");
    }
}

注意:

相关推荐
请你喝好果汁6412 小时前
Jupyter Notebook 配置学习笔记
笔记·学习·jupyter
冬日枝丫2 小时前
【spring】spring学习系列之六:spring的启动流程(下)
java·学习·spring
ocean10102 小时前
项目管理学习-CSPM-4考试总结
学习·程序人生
虾球xz2 小时前
游戏引擎学习第286天:开始解耦实体行为
c++·人工智能·学习·游戏引擎
應呈2 小时前
FreeRTOS的学习记录(任务创建,任务挂起)
java·linux·学习
序属秋秋秋2 小时前
我的创作纪念日——《惊变256天》
学习·程序人生·学习方法
maozexijr2 小时前
Flink 的任务槽和槽共享
大数据·flink
关于不上作者榜就原神启动那件事3 小时前
git版本控制学习
git·学习
LuckyLay3 小时前
Vue百日学习计划Day9-15天详细计划-Gemini版
前端·vue.js·学习
持之以恒的天秤4 小时前
线程同步学习
linux·学习