flink学习(3)——方法的使用—对流的处理(map,flatMap,filter)

map

数据

复制代码
86.149.9.216 10001 17/05/2015:10:05:30 GET /presentations/logstash-monitorama-2013/images/github-contributions.png
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:06:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:07:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:08:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:09:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:10:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:16:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:26:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css

package com.bigdata.day02;

import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * @基本功能:
 * @program:flinkProject
 * @author: jinnian
 * @create:2024-11-21 16:26:15
 **/
public class _01_map {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/a.log");
        //3. transformation-数据处理转换
        dataStreamSource.map(new MapFunction<String, String>() {
            @Override
            public String map(String s) throws Exception {
                String[] line = s.split("\\s+");
                LogBean logBean = new LogBean();
                logBean.setIp(line[0]);
                logBean.setUserId(Integer.parseInt(line[1]));
                logBean.setMethod(line[3]);
                
                // 解析时间的方式
                
                // 17/05/2015:10:05:30
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
                Date date = simpleDateFormat.parse(line[2]);
                
                // 另一种方法
                Date date1 = DateUtils.parseDate(line[2],"dd/MM/yyyy:hh:mm:ss");
                
                logBean.setTimestamp(date1.getTime());
                logBean.setPath(line[4]);

                return JSONObject.toJSONString(logBean);
            }
        }).print();
        //4. sink-数据输出


        //5. execute-执行
        env.execute();
    }
}

flatMap案例

复制代码
张三,苹果手机,联想电脑,华为平板
李四,华为手机,苹果电脑,小米平板

package com.bigdata.day02;

public class _02_flatmap {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/flatmap.log");
        //3. transformation-数据处理转换
        dataStreamSource.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public void flatMap(String s, Collector<String> collector) throws Exception {
                String[] split = s.split(",");
                for (int i = 1; i < split.length; i++) {
                    collector.collect(split[0]+"有"+split[i]);
                }
            }
        }).print();
        //4. sink-数据输出


        //5. execute-执行
        env.execute();
    }
}

filter案例

复制代码
package com.bigdata.day02;

public class _03_filter {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/a.log");
        //3. transformation-数据处理转换
        dataStreamSource.map(new MapFunction<String, String>() {
            @Override
            public String map(String s) throws Exception {
                String[] line = s.split("\\s+");
                LogBean logBean = new LogBean();
                logBean.setIp(line[0]);
                logBean.setUserId(Integer.parseInt(line[1]));
                logBean.setMethod(line[3]);
                // 17/05/2015:10:05:30
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
                Date date = simpleDateFormat.parse(line[2]);
                // 另一种方法
                Date date1 = DateUtils.parseDate(line[2],"dd/MM/yyyy:hh:mm:ss");
                logBean.setTimestamp(date1.getTime());
                logBean.setPath(line[4]);

                return JSONObject.toJSONString(logBean);
            }
        }).filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String s) throws Exception {

                return s.contains("83.149.9.216");
            }
        }).print();
        //4. sink-数据输出

        //5. execute-执行
        env.execute();
    }
}
相关推荐
MY_TEUCK5 小时前
Sealos 平台部署实战指南:结合 Cursor 与版本发布流程
java·人工智能·学习·aigc
2401_873479405 小时前
如何利用IP查询定位识别电商刷单?4个关键指标+工具配置方案
开发语言·tcp/ip·php
我爱cope6 小时前
【从0开始学设计模式-10| 装饰模式】
java·开发语言·设计模式
菜鸟学Python6 小时前
Python生态在悄悄改变:FastAPI全面反超,Django和Flask还行吗?
开发语言·python·django·flask·fastapi
朝新_6 小时前
【Spring AI 】图像与语音模型实战
java·人工智能·spring
RH2312117 小时前
2026.4.16Linux 管道
java·linux·服务器
zmsofts7 小时前
java面试必问13:MyBatis 一级缓存、二级缓存:从原理到脏数据,一篇讲透
java·面试·mybatis
浪浪小洋7 小时前
c++ qt课设定制
开发语言·c++
charlie1145141917 小时前
嵌入式C++工程实践第16篇:第四次重构 —— LED模板,从通用GPIO到专用抽象
c语言·开发语言·c++·驱动开发·嵌入式硬件·重构
handler017 小时前
Linux: 基本指令知识点(2)
linux·服务器·c语言·c++·笔记·学习