flink-cep实践

java 复制代码
package com.techwolf.hubble;

import com.alibaba.fastjson.JSONObject;
import com.techwolf.hubble.constant.Config;
import com.techwolf.hubble.model.TestEvent;
import org.apache.flink.api.common.eventtime.TimestampAssigner;
import org.apache.flink.api.common.eventtime.TimestampAssignerSupplier;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.PatternFlatSelectFunction;
import org.apache.flink.cep.PatternFlatTimeoutFunction;
import org.apache.flink.cep.PatternStream;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.sink.PrintSinkFunction;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;

import java.util.List;
import java.util.Map;


/**
 * Hello world!
 *
 */
public class App {

    public static void main(String[] args) throws Exception{
        //初始化环境
        StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //定义时间戳提取器作为输入流分配时间戳和水位线
        WatermarkStrategy<TestEvent> watermarkStrategy=WatermarkStrategy.<TestEvent>
                forMonotonousTimestamps().withTimestampAssigner(new EventTimeAssignerSupplier());

        DataStream<TestEvent> inputDataSteam=env.fromElements(
                new TestEvent("1","A",System.currentTimeMillis()-100*1000,"1"),
                new TestEvent("1","A",System.currentTimeMillis()-85*1000,"2"),
                new TestEvent("1","A",System.currentTimeMillis()-80*1000,"3"),
                new TestEvent("1","A",System.currentTimeMillis()-75*1000,"4"),
                new TestEvent("1","A",System.currentTimeMillis()-60*1000,"5"),
                new TestEvent("1","A",System.currentTimeMillis()-55*1000,"6"),
                new TestEvent("1","A",System.currentTimeMillis()-40*1000,"7"),
                new TestEvent("1","A",System.currentTimeMillis()-35*1000,"8"),
                new TestEvent("1","A",System.currentTimeMillis()-20*1000,"9"),
                new TestEvent("1","A",System.currentTimeMillis()-10*1000,"10"),
                new TestEvent("1","B",System.currentTimeMillis()-5*1000,"11")
        ).assignTimestampsAndWatermarks(watermarkStrategy);

        Pattern<TestEvent,TestEvent> pattern=Pattern.<TestEvent>begin("begin")
                .where(new SimpleCondition<TestEvent>() {
                    @Override
                    public boolean filter(TestEvent testEvent) throws Exception {
                        return testEvent.getAction().equals("A");
                    }
                }).
                followedBy("end")
                .where(new SimpleCondition<TestEvent>() {
                    @Override
                    public boolean filter(TestEvent testEvent) throws Exception {
                        return testEvent.getAction().equals("B");
                    }
                }).within(Time.seconds(10));


        PatternStream<TestEvent> patternStream=CEP.pattern(inputDataSteam.keyBy(TestEvent::getId),pattern);
        OutputTag<TestEvent> timeOutTag=new OutputTag<TestEvent>("timeOutTag"){};

        //处理匹配结果
        SingleOutputStreamOperator<TestEvent> twentySingleOutputStream=patternStream
                .flatSelect(timeOutTag,new EventTimeOut(),new FlatSelect())
                .uid("match_twenty_minutes_pattern");
        DataStream<String> result=twentySingleOutputStream.getSideOutput(timeOutTag).map(new MapFunction<TestEvent, String>() {
            @Override
            public String map(TestEvent testEvent) throws Exception {
                return JSONObject.toJSONString(testEvent);
            }
        });
        result.print();
        env.execute(Config.JOB_NAME);
    }

    public static class EventTimeOut implements PatternFlatTimeoutFunction<TestEvent,TestEvent> {
        private static final long serialVersionUID = -2471077777598713906L;
        @Override
        public void timeout(Map<String, List<TestEvent>> map, long l, Collector<TestEvent> collector) throws Exception {
            if (null != map.get("begin")) {
                for (TestEvent event : map.get("begin")) {
                    collector.collect(event);
                }
            }
        }
    }

    public static class FlatSelect implements PatternFlatSelectFunction<TestEvent,TestEvent> {
        private static final long serialVersionUID = 1753544074226581611L;
        @Override
        public void flatSelect(Map<String, List<TestEvent>> map, Collector<TestEvent> collector) throws Exception {
            if (null != map.get("begin")) {
                for (TestEvent event : map.get("begin")) {
                    collector.collect(event);
                }
            }
        }
    }

    public static class EventTimeAssignerSupplier implements TimestampAssignerSupplier<TestEvent> {
        private static final long serialVersionUID = -9040340771307752904L;

        @Override
        public TimestampAssigner<TestEvent> createTimestampAssigner(Context context) {
            return new EventTimeAssigner();
        }
    }

    public static class EventTimeAssigner implements TimestampAssigner<TestEvent> {
        @Override
        public long extractTimestamp(TestEvent event, long l) {
            return event.getEventTime();
        }
    }
}
相关推荐
健康有益科技12 小时前
AI驱动健康升级:新零售企业从“卖产品”到“卖健康”的转型路径
大数据·人工智能·健康医疗·零售
涛思数据(TDengine)12 小时前
新客户 | TDengine 时序数据库赋能开源鸿蒙物联展区实时监控与展示
大数据·时序数据库·tdengine
励志成为糕手12 小时前
ZooKeeper架构深度解析:分布式协调服务的核心设计与实现
大数据·分布式·zookeeper·架构·一致性
YangYang9YangYan12 小时前
2025年跨领域管理能力提升认证路径分析
大数据·人工智能
华略创新13 小时前
鼓励员工提出建议,激发参与感——制造企业软件应用升级的密钥
大数据·制造·软件开发·管理系统·erp·企业管理
武子康13 小时前
大数据-89 Spark应用必备:进程通信、序列化机制与RDD执行原理
大数据·后端·spark
白毛大侠13 小时前
如何安全地删除与重建 Elasticsearch 的 .watches 索引
大数据·elasticsearch·jenkins
zskj_qcxjqr14 小时前
七彩喜微高压氧舱:科技与体验的双重革新,重新定义家用氧疗新标杆
大数据·人工智能·科技·机器人
Elastic 中国社区官方博客14 小时前
Elasticsearch 的 JVM 基础知识:指标、内存和监控
java·大数据·elasticsearch·搜索引擎·全文检索
gptplusplus14 小时前
超越自动化:为什么说供应链的终局是“AI + 人类专家”的混合智能?
大数据·人工智能