flink学习(3)——方法的使用—对流的处理(map,flatMap,filter)

map

数据

复制代码
86.149.9.216 10001 17/05/2015:10:05:30 GET /presentations/logstash-monitorama-2013/images/github-contributions.png
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:06:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:07:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:08:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:09:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:10:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:16:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:26:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css

package com.bigdata.day02;

import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * @基本功能:
 * @program:flinkProject
 * @author: jinnian
 * @create:2024-11-21 16:26:15
 **/
public class _01_map {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/a.log");
        //3. transformation-数据处理转换
        dataStreamSource.map(new MapFunction<String, String>() {
            @Override
            public String map(String s) throws Exception {
                String[] line = s.split("\\s+");
                LogBean logBean = new LogBean();
                logBean.setIp(line[0]);
                logBean.setUserId(Integer.parseInt(line[1]));
                logBean.setMethod(line[3]);
                
                // 解析时间的方式
                
                // 17/05/2015:10:05:30
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
                Date date = simpleDateFormat.parse(line[2]);
                
                // 另一种方法
                Date date1 = DateUtils.parseDate(line[2],"dd/MM/yyyy:hh:mm:ss");
                
                logBean.setTimestamp(date1.getTime());
                logBean.setPath(line[4]);

                return JSONObject.toJSONString(logBean);
            }
        }).print();
        //4. sink-数据输出


        //5. execute-执行
        env.execute();
    }
}

flatMap案例

复制代码
张三,苹果手机,联想电脑,华为平板
李四,华为手机,苹果电脑,小米平板

package com.bigdata.day02;

public class _02_flatmap {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/flatmap.log");
        //3. transformation-数据处理转换
        dataStreamSource.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public void flatMap(String s, Collector<String> collector) throws Exception {
                String[] split = s.split(",");
                for (int i = 1; i < split.length; i++) {
                    collector.collect(split[0]+"有"+split[i]);
                }
            }
        }).print();
        //4. sink-数据输出


        //5. execute-执行
        env.execute();
    }
}

filter案例

复制代码
package com.bigdata.day02;

public class _03_filter {

    public static void main(String[] args) throws Exception {

        //1. env-准备环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //2. source-加载数据
        DataStreamSource<String> dataStreamSource = env.readTextFile("datas/a.log");
        //3. transformation-数据处理转换
        dataStreamSource.map(new MapFunction<String, String>() {
            @Override
            public String map(String s) throws Exception {
                String[] line = s.split("\\s+");
                LogBean logBean = new LogBean();
                logBean.setIp(line[0]);
                logBean.setUserId(Integer.parseInt(line[1]));
                logBean.setMethod(line[3]);
                // 17/05/2015:10:05:30
                SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
                Date date = simpleDateFormat.parse(line[2]);
                // 另一种方法
                Date date1 = DateUtils.parseDate(line[2],"dd/MM/yyyy:hh:mm:ss");
                logBean.setTimestamp(date1.getTime());
                logBean.setPath(line[4]);

                return JSONObject.toJSONString(logBean);
            }
        }).filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String s) throws Exception {

                return s.contains("83.149.9.216");
            }
        }).print();
        //4. sink-数据输出

        //5. execute-执行
        env.execute();
    }
}
相关推荐
龚子亦4 分钟前
Unity学习之Shader(Phong与Blinn-Phong)
学习·unity·游戏引擎·shader
人员安全定位37 分钟前
仓储物流中UWB标签实现货物实时追踪的技术路径与品铂科技方案解析
大数据·科技·物联网
nenchoumi31192 小时前
Pytorch学习笔记(十二)Learning PyTorch - NLP from Scratch
pytorch·笔记·学习
技术干货贩卖机2 小时前
MATLAB绘图配色包说明
开发语言·matlab
时光不负追梦人2 小时前
谈谈对spring IOC的理解,原理和实现
java·后端·spring
胡耀超3 小时前
7.模型选择与评估:构建科学的参数调优与性能评估体系——Python数据挖掘代码实践
开发语言·人工智能·python·机器学习·数据挖掘
沐墨专攻技术3 小时前
深入理解指针(4)(C语言版)
c语言·开发语言
程序猿ZhangSir3 小时前
Redis 和 MySQL双写一致性的更新策略有哪些?常见面试题深度解答。
java·数据库·spring boot·redis·mysql·缓存·mybatis
flying jiang3 小时前
HttpServletRequest
java·网络·websocket·http
执念斩长河3 小时前
gin学习
驱动开发·学习·gin