39、Flink 的窗口函数 WindowFunction 示例

bash 复制代码
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.state.KeyedStateStore;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.assigners.TumblingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import java.time.Duration;

public class _06_WindowFunction {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStreamSource<String> input = env.socketTextStream("localhost", 8888);

        // ReduceFunction
        input.keyBy(e -> e)
                .window(TumblingProcessingTimeWindows.of(Duration.ofSeconds(5)))
                .reduce(new ReduceFunction<String>() {
                    public String reduce(String v1, String v2) {
                        return v1 + "-" + v2;
                    }
                })
                .print();

        // AggregateFunction
        input.keyBy(e -> e)
                .window(TumblingProcessingTimeWindows.of(Duration.ofSeconds(5)))
                .aggregate(new MyAggregateFunction());

        // ProcessWindowFunction
        input
                .keyBy(e -> e)
                .window(TumblingEventTimeWindows.of(Duration.ofSeconds(5)))
                .process(new MyProcessWindowFunction());

        // 增量聚合的 ProcessWindowFunction
        // 使用 ReduceFunction 增量聚合
        input
                .keyBy(e -> e)
                .window(TumblingProcessingTimeWindows.of(Duration.ofSeconds(5)))
                .reduce(new MyReduceProcessFunction(), new MyProcessWindowFunction2());

        // 使用 AggregateFunction 增量聚合
        input
                .keyBy(e -> e)
                .window(TumblingProcessingTimeWindows.of(Duration.ofSeconds(5)))
                .aggregate(new AverageAggregate(), new MyProcessWindowFunction3());

        // 在 ProcessWindowFunction 中使用 per-window state
        // ProcessWindowFunction
        input
                .keyBy(e -> e)
                .window(TumblingEventTimeWindows.of(Duration.ofSeconds(5)))
                .process(new ProcessWindowFunction<String, String, String, TimeWindow>() {
                    @Override
                    public void process(String s, ProcessWindowFunction<String, String, String, TimeWindow>.Context context, Iterable<String> iterable, Collector<String> collector) throws Exception {
                        // 访问全局的 keyed state
                        KeyedStateStore globalState = context.globalState();

                        // 访问作用域仅限于当前窗口的 keyed state
                        KeyedStateStore windowState = context.windowState();
                    }
                });

        env.execute();
    }
}

class MyAggregateFunction implements AggregateFunction<String, String, String> {

    @Override
    public String createAccumulator() {
        return "createAccumulator->";
    }

    @Override
    public String add(String s1, String s2) {
        return s1 + "-" + s2;
    }

    @Override
    public String getResult(String s) {
        return "res=>" + s;
    }

    @Override
    public String merge(String s1, String acc1) {
        return "merge=>" + s1 + ",=>" + acc1;
    }
}

class MyProcessWindowFunction extends ProcessWindowFunction<String, String, String, TimeWindow> {

    @Override
    public void process(String s, ProcessWindowFunction<String, String, String, TimeWindow>.Context context, Iterable<String> iterable, Collector<String> collector) throws Exception {
        for (String res : iterable) {
            collector.collect(res);
        }
    }
}

class MyReduceProcessFunction implements ReduceFunction<String> {

    public String reduce(String r1, String r2) {
        return r1 + "-" + r2;
    }
}

class MyProcessWindowFunction2 extends ProcessWindowFunction<String, Tuple2<Long, String>, String, TimeWindow> {

    public void process(String key,
                        Context context,
                        Iterable<String> minReadings,
                        Collector<Tuple2<Long, String>> out) {
        String min = minReadings.iterator().next();
        out.collect(new Tuple2<>(context.window().getStart(), min));
    }
}

class AverageAggregate implements AggregateFunction<String, String, String> {

    @Override
    public String createAccumulator() {
        return "createAccumulator=>";
    }

    @Override
    public String add(String s1, String s2) {
        return s1 + "-" + s2;
    }

    @Override
    public String getResult(String s) {
        return s;
    }

    @Override
    public String merge(String s, String acc1) {
        return "merge->" + s + "-" + acc1;
    }
}

class MyProcessWindowFunction3 extends ProcessWindowFunction<String, Tuple2<String, Double>, String, TimeWindow> {

    public void process(String key,
                        Context context,
                        Iterable<String> averages,
                        Collector<Tuple2<String, Double>> out) {
        String average = averages.iterator().next();
        out.collect(new Tuple2<>(key, 1.0));
    }
}
相关推荐
黄雪超1 小时前
核心知识—— RDD常用算子之数据转换
大数据·spark
AWS官方合作商1 小时前
AWS云服务:大数据公司实现技术突破与商业价值的核心引擎
大数据·云计算·aws
码界筑梦坊5 小时前
基于Spark的抖音数据分析热度预测系统
大数据·信息可视化·数据分析·spark·毕业设计·个性化推荐
生信学习小达人6 小时前
arcgis10.8 Toolbox中没有找到conversion tools模块
大数据
Oo_Amy_oO7 小时前
Airflow+Spark/Flink vs. Kettle
大数据·flink·spark
后端小肥肠8 小时前
港大团队开源LightRAG:知识图谱+双层检索,复杂问答准确率飙升30%
大数据·人工智能·openai
计算机毕设定制辅导-无忧学长20 小时前
TDengine 权限管理与安全配置实战(二)
大数据·安全·tdengine
2401_8979300620 小时前
Kibana 连接 Elasticsearch(8.11.3)教程
大数据·elasticsearch·jenkins
计算机毕设定制辅导-无忧学长20 小时前
TDengine 快速上手:安装部署与基础 SQL 实践(一)
大数据·sql·tdengine
塔能物联运维20 小时前
塔能科技:精准节能,擎动工厂可持续发展巨轮
大数据·运维