flink Transformation算子(更新中)

Transformation算子

map

该方法是将一个DataStream调用map方法返回一个新的DataStream。本质是将该DataStream中对应的每一条数据依次迭代出来,应用map方法传入的计算逻辑,返回一个新的DataStream。原来的DataStream中对应的每一条数据,与新生成的DataStream中数据是一一对应的,也可以说是存在着映射关系的。
java 复制代码
package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class MapDemo  {
    public static void main(String[] args) throws  Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
         DataStreamSource<String> dataSource = env.socketTextStream("192.168.25.62", 8899);
         dataSource.map(new QueryCategoryNameFromMySQLFunction()).print();
         env.execute();

    }
}
java 复制代码
 package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.java.tuple.Tuple4;
import org.apache.flink.configuration.Configuration;


import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;

public class QueryCategoryNameFromMySQLFunction extends RichMapFunction<String, Tuple4<String, String, String, Double>> {
    private Connection connection;
    private PreparedStatement preparedStatement;
    @Override
    public void open(Configuration parameters) throws Exception {
        connection=DriverManager.getConnection("jdbc:mysql://localhost:3306/dy_flink?characterEncoding=utf-8","root","");
        preparedStatement=connection.prepareStatement("select name from t_category where id = ?");
        super.open(parameters);
    }

    @Override
    public Tuple4<String, String, String, Double> map(String s) throws Exception {
         String[] fields = s.split(",");
         String cid = fields[0];
         preparedStatement.setInt(1,Integer.parseInt(cid));
         ResultSet resultSet = preparedStatement.executeQuery();
        String name = "未知";
         while (resultSet.next()){
               name = resultSet.getString(1);
         }
         resultSet.close();
        return  Tuple4.of(fields[0], fields[1], name, Double.parseDouble(fields[3]));
    }
    
    @Override
    public void close() throws Exception {
        if(connection!=null){
            connection.close();
        }
        if(preparedStatement!=null){
            preparedStatement.close();
        }
    }
}

flatMap扁平化映射(DataStream → DataStream)

- 该方法是将一个DataStream调用flatMap方法返回一个新的DataStream,本质上是将该DataStream中的对应的每一条数据依次迭代出来,应用flatMap方法传入的计算逻辑,返回一个新的DataStream。原来的DataStream中输入的一条数据经过flatMap方法传入的计算逻辑后,会返回零到多条数据。所谓的扁平化即将原来的数据压平,返回多条数据。
java 复制代码
package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class FlatMapDemo1 {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8889);
         SingleOutputStreamOperator<Tuple2<String, Integer>> flatMapData = source.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
                String[] strings = value.split(" ");
                for (String string : strings) {
                    out.collect(Tuple2.of(string, 1));
                }
            }
        });
         flatMapData.print();
         env.execute("pxj");
    }
}
java 复制代码
package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class FlatMapDemo2 {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8887);
         SingleOutputStreamOperator<Tuple2<String, Integer>> myflatMap = source.transform("MyflatMap", TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {

        }), new MyflatMap());
         myflatMap.print();
        env.execute("pxj");
    }
}
java 复制代码
package com.lyj.sx.flink.day03;

import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;

public  class MyflatMap extends AbstractStreamOperator<Tuple2<String, Integer>> implements OneInputStreamOperator<String, Tuple2<String, Integer>> {
    @Override
    public void processElement(StreamRecord<String> element) throws Exception {
         String[] split = element.getValue().split(",");
        for (String s : split) {
            output.collect(element.replace(Tuple2.of(s,1)));
        }
    }

    @Override
    public void setKeyContextElement(StreamRecord<String> record) throws Exception {
        System.out.println("StreamRecord..............");
        OneInputStreamOperator.super.setKeyContextElement(record);
    
    }
}

keyBy按key分区(DataStream → KeyedStream)

java 复制代码
package com.lyj.sx.flink.day03;


import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeyByDemo1 {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8888);
         MapFunction<String, Tuple2<String, Integer>> mapFunction = new MapFunction<String, Tuple2<String, Integer>>() {
            Tuple2<String, Integer> t;

            @Override
            public Tuple2<String, Integer> map(String s) throws Exception {
                String[] strings = s.split(" ");
                for (String string : strings) {
                    t = Tuple2.of(string, 1);
                }
                return t;
            }
        };
         source.map(mapFunction).print();
         env.execute("pxj");
    }
}
java 复制代码
 package com.lyj.sx.flink.day03;


import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeyByDemo2 {
    public static void main(String[] args) throws  Exception {
         StreamExecutionEnvironment env= StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8886);
         SingleOutputStreamOperator<Tuple3<String, String, Double>> tpStream = source.map(new MapFunction<String, Tuple3<String, String, Double>>() {
            @Override
            public Tuple3<String, String, Double> map(String s) throws Exception {
                String[] fields = s.split(",");
                return Tuple3.of(fields[0], fields[1], Double.parseDouble(fields[2]));
            }
        });
        KeyedStream<Tuple3<String, String, Double>, Tuple> tuple3TupleKeyedStream = tpStream.keyBy("f0", "f1");
        tuple3TupleKeyedStream.print();
        env.execute("pxj");
    }
}
java 复制代码
package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeyByDemo3 {
    public static void main(String[] args)throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8886);
         SingleOutputStreamOperator<Tuple2<String, Integer>> map = source.map(new MapFunction<String, Tuple2<String, Integer>>() {
            Tuple2<String,Integer> t;
             @Override
            public Tuple2<String, Integer> map(String s) throws Exception {
                 for (String string : s.split(",")) {
                     t=Tuple2.of(string,1);
                 }
                return t;
            }
        });
         map.print();
        KeyedStream<Tuple2<String, Integer>, String> keyedStream = map.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
            }
        });
        keyedStream.print();
        env.execute("pxj");
    }
}
java 复制代码
package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeyByDemo4 {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
        DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8886);
         SingleOutputStreamOperator<Tuple3<String, String, Double>> mapped = source.map(new MapFunction<String, Tuple3<String, String, Double>>() {
            @Override
            public Tuple3<String, String, Double> map(String s) throws Exception {
                String[] fields = s.split(",");
                return Tuple3.of(fields[0], fields[1], Double.parseDouble(fields[2]));
            }
        });
        mapped.keyBy(new KeySelector<Tuple3<String, String, Double>, String>() {
            @Override
            public String getKey(Tuple3<String, String, Double> key) throws Exception {
                return key.f0+key.f1;
            }
        }).print();
        env.execute("pxj");
    }
}
java 复制代码
 package com.lyj.sx.flink.day03;


import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class KeyByDemo5 {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8886);
        //山东省,济南市,5000
         SingleOutputStreamOperator<Tuple3<String, String, Double>> map = source.map(new MapFunction<String, Tuple3<String, String, Double>>() {
            @Override
            public Tuple3<String, String, Double> map(String s) throws Exception {
                String[] fields = s.split(",");
                return Tuple3.of(fields[0], fields[1], Double.parseDouble(fields[2]));
            }
        });
         KeyedStream<Tuple3<String, String, Double>, Tuple2<String, String>> tuple3Tuple2KeyedStream = map.keyBy(t -> Tuple2.of(t.f0, t.f1), TypeInformation.of(new TypeHint<Tuple2<String, String>>() {
        }));
        tuple3Tuple2KeyedStream.print();
        env.execute("pxj");
    }
}
java 复制代码
 package com.lyj.sx.flink.day03;

import com.lyj.sx.flink.bean.DataBean;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.beans.beancontext.BeanContext;

public class KeyByDemo6 {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8886);
         SingleOutputStreamOperator<DataBean> beanStream = source.map(new MapFunction<String, DataBean>() {
            @Override
            public DataBean map(String s) throws Exception {
                String[] fields = s.split(",");
                return DataBean.of(fields[0], fields[1], Double.parseDouble(fields[2]));
            }
        });
        KeyedStream<DataBean, DataBean> keyedStream = beanStream.keyBy(t -> t);

        keyedStream.print();
    
        env.execute("pxj");
    }
}

filter过滤(DataStream → DataStream)

- 该方法是将一个DataStream调用filter方法返回一个新的DataStream,本质上是将该DataStream中的对应的每一条输入数据依次迭代出来,应用filter方法传入的过滤逻辑,返回一个新的DataStream。原来的DataStream中输入的一条数据经过fliter方法传入的过滤逻辑后,返回true就会保留这条数据,返回false就会过滤掉该数据。
java 复制代码
 package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class FilterDemo1 {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8886);
         SingleOutputStreamOperator<String> filter = source.filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String s) throws Exception {
                return s.startsWith("h");
            }
        }).setParallelism(2);
         filter.print();
         env.execute("pxj");
    }
}
java 复制代码
 package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.operators.StreamFilter;

public class FilterDemo2 {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env= StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8886);
         SingleOutputStreamOperator<String> transform = source.transform("myfilter", TypeInformation.of(String.class), new StreamFilter<>(w -> w.startsWith("h")));
         transform.print();
         env.execute("pxj");
    }
}
java 复制代码
package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class FilterDemo3 {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env= StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8886);
         SingleOutputStreamOperator<String> transform = source.transform("MyFilterFunction", TypeInformation.of(String.class), new MyFilterFunction());
         transform.print();
         env.execute("pxj");
    }
}
java 复制代码
 package com.lyj.sx.flink.day03;

import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;

public class MyFilterFunction extends AbstractStreamOperator<String> implements OneInputStreamOperator<String, String> {
    @Override
    public void processElement(StreamRecord<String> element) throws Exception {
         String elementValue = element.getValue();
         if(elementValue.startsWith("h")){
             output.collect(element);
         }
    }

    @Override
    public void setKeyContextElement(StreamRecord<String> record) throws Exception {
        OneInputStreamOperator.super.setKeyContextElement(record);
        System.out.println("setKeyContextElement.........");
    }
}

底层实现filter and map

java 复制代码
package com.lyj.sx.flink.day03;

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

public class FilterAndMapDemo {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(new Configuration());
         DataStreamSource<String> source = env.socketTextStream("192.168.25.62", 8885);
         SingleOutputStreamOperator<Double> myFilterAndMap = source.transform("MyFilterAndMap", TypeInformation.of(Double.class), new MyFilterAndMap());
         myFilterAndMap.print();
        env.execute("pxj");
    }
}
package com.lyj.sx.flink.day03;

import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;

public class MyFilterAndMap extends AbstractStreamOperator<Double> implements OneInputStreamOperator<String, Double> {
    @Override
    public void processElement(StreamRecord<String> element) throws Exception {
         String value = element.getValue();
         try {
             int i = Integer.parseInt(value);
             if(i%2==0){
                 double r=i*10.1;
                 output.collect(element.replace(r));
             }
         }catch (Exception e){
             e.printStackTrace();
         }

    }

    @Override
    public void setKeyContextElement(StreamRecord<String> record) throws Exception {
        OneInputStreamOperator.super.setKeyContextElement(record);
    }
}

整理人:pxj_sx(潘陈)

日 期:2024-06-02 16:06:42

相关推荐
Json_181790144802 小时前
An In-depth Look into the 1688 Product Details Data API Interface
大数据·json
Qspace丨轻空间4 小时前
气膜场馆:推动体育文化旅游创新发展的关键力量—轻空间
大数据·人工智能·安全·生活·娱乐
Elastic 中国社区官方博客5 小时前
如何将数据从 AWS S3 导入到 Elastic Cloud - 第 3 部分:Elastic S3 连接器
大数据·elasticsearch·搜索引擎·云计算·全文检索·可用性测试·aws
Aloudata6 小时前
从Apache Atlas到Aloudata BIG,数据血缘解析有何改变?
大数据·apache·数据血缘·主动元数据·数据链路
水豚AI课代表6 小时前
分析报告、调研报告、工作方案等的提示词
大数据·人工智能·学习·chatgpt·aigc
拓端研究室TRL9 小时前
【梯度提升专题】XGBoost、Adaboost、CatBoost预测合集:抗乳腺癌药物优化、信贷风控、比特币应用|附数据代码...
大数据
黄焖鸡能干四碗9 小时前
信息化运维方案,实施方案,开发方案,信息中心安全运维资料(软件资料word)
大数据·人工智能·软件需求·设计规范·规格说明书
编码小袁9 小时前
探索数据科学与大数据技术专业本科生的广阔就业前景
大数据
WeeJot嵌入式10 小时前
大数据治理:确保数据的可持续性和价值
大数据
zmd-zk11 小时前
kafka+zookeeper的搭建
大数据·分布式·zookeeper·中间件·kafka