java
package com.flink.DataStream.env;
import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
public class flinkEnvDemo {
public static void main(String[] args) throws Exception {
//TODO 创建一个Flink的配置对象
Configuration configuration = new Configuration();
//默认是8081,我们改为8082
configuration.set(RestOptions.BIND_PORT, "8082");
//TODO 创建Flink的执行环境
StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment
//.createLocalEnvironment() //创建本地环境
//.createRemoteEnvironment() //远程环境
//开发过程中直接使用,他会自动判断是本地集群还是远程环境
//.getExecutionEnvironment();
//.getExecutionEnvironment(configuration);
.createLocalEnvironmentWithWebUI(configuration); //不启动Flink集群也可以有Web UI
//TODO 流批一体:代码api是同一套 可以指定为流(默认),也可以指定为批
//TODO 一般不在代码中写死,提交时,指定参数 ---Dexeution.runtime-mode=STREAMING/BATCH
streamExecutionEnvironment.setRuntimeMode(RuntimeExecutionMode.STREAMING);
//TODO 创建FLink的source为socket数据源
DataStreamSource<String> dataStreamSource = streamExecutionEnvironment.socketTextStream("localhost", 8888);
//TODO 扁平化+转换+分组+聚合
SingleOutputStreamOperator<Tuple2<String, Integer>> singleOutputStreamOperator = dataStreamSource.flatMap(
//使用Lamada表达式实现flatMap接口,当然也可以直接new一个匿名类实现,或者在外部单独定义一个接口实现
//泛型第一个是输入类型,第二个是输出类型
(String s, Collector<Tuple2<String, Integer>> collector) -> {
String[] splitResult = s.split(" ");
//循环遍历,将数据转为Tuple类型.spark的rdd算子map: _.map((_,1))
for (String word : splitResult) {
Tuple2<String, Integer> wordsAndOne = Tuple2.of(word, 1);
//使用采集器向下游发送数据
collector.collect(wordsAndOne);
}
})
.returns(Types.TUPLE(Types.STRING, Types.INT))
.keyBy(
(Tuple2<String, Integer> value) -> {
return value.f0;
}
).sum(1);
//TODO Sink输出
singleOutputStreamOperator.print();
//TODO 执行Flink程序,需要抛异常
streamExecutionEnvironment.execute("Flink Environment Demo");
//TODO ......
/**
* 默认env.execute() 触发一个Flink Job
* 一个main方法理论上可以指定多个execute,但是没有什么意义,因为指定到第一个就会阻塞掉
* 但是Flink 提供了异步执行的方式,一个main方法里面executeAsync()的个数 = 生成的Flink Job数
* */
//streamExecutionEnvironment.executeAsync();//异步执行
}
}