flink: table api之自定义聚合函数

复制代码
package cn.edu.tju.demo3;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.*;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.descriptors.*;
import org.apache.flink.table.functions.AggregateFunction;
import org.apache.flink.table.functions.ScalarFunction;
import org.apache.flink.table.functions.TableFunction;
import org.apache.flink.types.Row;

public class Test50 {
    private static String HOST_NAME = "xx.xx.xx.xx";
    private static int PORT = 9999;
    private static String DELIMITER ="\n";


    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);



        DataStream<String> socketDataInfo =  env.socketTextStream(HOST_NAME, PORT, DELIMITER);
        SingleOutputStreamOperator<DataInfo> dataInfoStream = socketDataInfo.map(new MapFunction<String, DataInfo>() {
            @Override
            public DataInfo map(String value) throws Exception {

                String[] stringList = value.split(",");
                DataInfo dataInfo = new DataInfo(Long.parseLong(
                        stringList[0]), stringList[1], Double.parseDouble(stringList[2]));
                return dataInfo;
            }
        });

        Table dataTable = tableEnv.fromDataStream(dataInfoStream,"ts,info,val");
        tableEnv.registerFunction("myAggregateFunction", new MyAggregateFunction());
        Table resultTable = dataTable.select("ts,info,val")
                        .groupBy("info")
                                .aggregate("myAggregateFunction(val) as avgVal" )
                                        .select("info, avgVal");

        tableEnv.createTemporaryView("dataInfo", dataTable);

        Table resultTableSql = tableEnv.sqlQuery(
                "select info,myAggregateFunction(val) from dataInfo group by info"

        );


        tableEnv.toRetractStream(resultTable, Row.class).print();
        tableEnv.toRetractStream(resultTableSql, Row.class).print("sql");

        env.execute("my job");

    }

    public static class DataInfo{
        private long ts;
        private String info;
        private double val;

        public long getTs() {
            return ts;
        }

        public void setTs(long ts) {
            this.ts = ts;
        }

        public String getInfo() {
            return info;
        }

        public void setInfo(String info) {
            this.info = info;
        }

        public double getVal() {
            return val;
        }

        public void setVal(double val) {
            this.val = val;
        }

        @Override
        public String toString() {
            return "DataInfo{" +
                    "ts=" + ts +
                    ", info='" + info + '\'' +
                    ", val='" + val + '\'' +
                    '}';
        }

        public DataInfo(long ts, String info, double val) {
            this.ts = ts;
            this.info = info;
            this.val = val;
        }

        public DataInfo() {

        }
    }

    //自定义聚合函数,实现getResult和方法
    public static class MyAggregateFunction extends AggregateFunction<Double, Tuple2<Double, Integer>> {

        @Override
        public Double getValue(Tuple2<Double, Integer> accumulator) {
            return accumulator.f0/accumulator.f1;
        }

        @Override
        public Tuple2<Double, Integer> createAccumulator() {
            return new Tuple2(0.0, 0);
        }

        public void accumulate(Tuple2<Double, Integer> accumulator, double d){
            accumulator.f1 += 1;
            accumulator.f0 += d;
        }


    }
}

nc -lk 9999

输入:

复制代码
1689999831,ffff,34.2
1689999832,ffff,35.3

结果

相关推荐
Hello.Reader2 分钟前
Flink 文件系统通用配置默认文件系统与连接数限制实战
vue.js·flink·npm
AIFarmer4 分钟前
在EV3上运行Python语言——无线编程
python·ev3
YongCheng_Liang5 分钟前
零基础学大数据:大数据基础与前置技术夯实
大数据·big data
不会代码的小测试6 分钟前
UI自动化-POM封装
开发语言·python·selenium·自动化
2401_841495648 分钟前
【LeetCode刷题】二叉树的层序遍历
数据结构·python·算法·leetcode·二叉树··队列
AC赳赳老秦10 分钟前
2026国产算力新周期:DeepSeek实战适配英伟达H200,引领大模型训练效率跃升
大数据·前端·人工智能·算法·tidb·memcache·deepseek
ZH154558913118 分钟前
Flutter for OpenHarmony Python学习助手实战:GUI桌面应用开发的实现
python·学习·flutter
鹏说大数据19 分钟前
Spark 和 Hive 的关系与区别
大数据·hive·spark
B站计算机毕业设计超人23 分钟前
计算机毕业设计Hadoop+Spark+Hive招聘推荐系统 招聘大数据分析 大数据毕业设计(源码+文档+PPT+ 讲解)
大数据·hive·hadoop·python·spark·毕业设计·课程设计
B站计算机毕业设计超人24 分钟前
计算机毕业设计hadoop+spark+hive交通拥堵预测 交通流量预测 智慧城市交通大数据 交通客流量分析(源码+LW文档+PPT+讲解视频)
大数据·hive·hadoop·python·spark·毕业设计·课程设计