map
数据
86.149.9.216 10001 17/05/2015:10:05:30 GET /presentations/logstash-monitorama-2013/images/github-contributions.png
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
83.149.9.216 10002 17/05/2015:10:06:53 GET /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:06:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:07:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:08:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:09:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:10:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:16:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
10.0.0.1 10003 17/05/2015:10:26:53 POST /presentations/logstash-monitorama-2013/css/print/paper.css
package com.bigdata.day02;
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.lang3.time.DateUtils;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.text.SimpleDateFormat;
import java.util.Date;
/**
* @基本功能:
* @program:flinkProject
* @author: jinnian
* @create:2024-11-21 16:26:15
**/
public class _01_map {
public static void main(String[] args) throws Exception {
//1. env-准备环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
//2. source-加载数据
DataStreamSource<String> dataStreamSource = env.readTextFile("datas/a.log");
//3. transformation-数据处理转换
dataStreamSource.map(new MapFunction<String, String>() {
@Override
public String map(String s) throws Exception {
String[] line = s.split("\\s+");
LogBean logBean = new LogBean();
logBean.setIp(line[0]);
logBean.setUserId(Integer.parseInt(line[1]));
logBean.setMethod(line[3]);
// 解析时间的方式
// 17/05/2015:10:05:30
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
Date date = simpleDateFormat.parse(line[2]);
// 另一种方法
Date date1 = DateUtils.parseDate(line[2],"dd/MM/yyyy:hh:mm:ss");
logBean.setTimestamp(date1.getTime());
logBean.setPath(line[4]);
return JSONObject.toJSONString(logBean);
}
}).print();
//4. sink-数据输出
//5. execute-执行
env.execute();
}
}
flatMap案例
张三,苹果手机,联想电脑,华为平板
李四,华为手机,苹果电脑,小米平板
package com.bigdata.day02;
public class _02_flatmap {
public static void main(String[] args) throws Exception {
//1. env-准备环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
//2. source-加载数据
DataStreamSource<String> dataStreamSource = env.readTextFile("datas/flatmap.log");
//3. transformation-数据处理转换
dataStreamSource.flatMap(new FlatMapFunction<String, String>() {
@Override
public void flatMap(String s, Collector<String> collector) throws Exception {
String[] split = s.split(",");
for (int i = 1; i < split.length; i++) {
collector.collect(split[0]+"有"+split[i]);
}
}
}).print();
//4. sink-数据输出
//5. execute-执行
env.execute();
}
}
filter案例
package com.bigdata.day02;
public class _03_filter {
public static void main(String[] args) throws Exception {
//1. env-准备环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);
//2. source-加载数据
DataStreamSource<String> dataStreamSource = env.readTextFile("datas/a.log");
//3. transformation-数据处理转换
dataStreamSource.map(new MapFunction<String, String>() {
@Override
public String map(String s) throws Exception {
String[] line = s.split("\\s+");
LogBean logBean = new LogBean();
logBean.setIp(line[0]);
logBean.setUserId(Integer.parseInt(line[1]));
logBean.setMethod(line[3]);
// 17/05/2015:10:05:30
SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MM/yyyy:hh:mm:ss");
Date date = simpleDateFormat.parse(line[2]);
// 另一种方法
Date date1 = DateUtils.parseDate(line[2],"dd/MM/yyyy:hh:mm:ss");
logBean.setTimestamp(date1.getTime());
logBean.setPath(line[4]);
return JSONObject.toJSONString(logBean);
}
}).filter(new FilterFunction<String>() {
@Override
public boolean filter(String s) throws Exception {
return s.contains("83.149.9.216");
}
}).print();
//4. sink-数据输出
//5. execute-执行
env.execute();
}
}