1、概述
1)left join 实现
bash
for (Tuple3<String, String, Long> leftTuple : left) {
boolean isJoin = false;
for (Tuple3<String, String, Long> rightTuple : right) {
if (Objects.equals(leftTuple.f2, rightTuple.f2)) {
isJoin = true;
collector.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, rightTuple.f1, leftTuple.f2, rightTuple.f2));
}
}
if (!isJoin) {
collector.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, "", leftTuple.f2, 0L));
}
}
2、right join 实现
bash
for (Tuple3<String, String, Long> rightTuple : right) {
boolean isJoin = false;
for (Tuple3<String, String, Long> leftTuple : left) {
if (Objects.equals(leftTuple.f2, rightTuple.f2)) {
isJoin = true;
collector.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, rightTuple.f1, leftTuple.f2, rightTuple.f2));
}
}
if (!isJoin) {
collector.collect(new Tuple5<>(rightTuple.f0, "", rightTuple.f1, 0L, rightTuple.f2));
}
}
3、Inner Join 实现
bash
// inner join
for (Tuple3<String, String, Long> leftTuple : left) {
for (Tuple3<String, String, Long> rightTuple : right) {
if (Objects.equals(leftTuple.f2, rightTuple.f2)) {
collector.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, rightTuple.f1, leftTuple.f2, rightTuple.f2));
}
}
}
2、完整代码示例
bash
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.functions.RichCoGroupFunction;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.util.Collector;
import java.time.Duration;
import java.util.Objects;
public class _05_CoGroupInnerOuterJoin {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 测试时限制了分区数,生产中需要设置空闲数据源
env.setParallelism(2);
env.disableOperatorChaining();
DataStreamSource<String> inputLeft = env.socketTextStream("localhost", 8888);
// 事件时间需要设置水位线策略和时间戳
SingleOutputStreamOperator<Tuple3<String, String, Long>> mapLeft = inputLeft.map(new MapFunction<String, Tuple3<String, String, Long>>() {
@Override
public Tuple3<String, String, Long> map(String input) throws Exception {
String[] fields = input.split(",");
return new Tuple3<>(fields[0], fields[1], Long.parseLong(fields[2]));
}
});
SingleOutputStreamOperator<Tuple3<String, String, Long>> watermarkLeft = mapLeft.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, String, Long> input, long l) {
return input.f2;
}
}));
DataStreamSource<String> inputRight = env.socketTextStream("localhost", 9999);
// 事件时间需要设置水位线策略和时间戳
SingleOutputStreamOperator<Tuple3<String, String, Long>> mapRight = inputRight.map(new MapFunction<String, Tuple3<String, String, Long>>() {
@Override
public Tuple3<String, String, Long> map(String input) throws Exception {
String[] fields = input.split(",");
return new Tuple3<>(fields[0], fields[1], Long.parseLong(fields[2]));
}
});
SingleOutputStreamOperator<Tuple3<String, String, Long>> watermarkRight = mapRight.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, String, Long> input, long l) {
return input.f2;
}
}));
/**
* left-join 测试数据
*
* left-1
*
* a,1,1718089200000
* b,2,1718089200000
* c,3,1718089200000
*
* right-2
*
* a,1,1718089200000
* b,2,1718089200000
* c,3,1718089200000
*
* left-3
*
* a,4,1718089202000
* b,5,1718089202000
* c,6,1718089202000
*
* right-4
*
* a,1,1718089202000
* b,2,1718089202000
* c,3,1718089202000
*
* left-right-5
*
* a,1,1718089205001
* b,2,1718089205001
* c,3,1718089205001
*
* 1> (a,1,1,1718089200000,1718089200000)
* 1> (b,2,2,1718089200000,1718089200000)
* 1> (c,3,3,1718089200000,1718089200000)
* 1> (a,4,,1718089202000,0)
* 2> (b,5,,1718089202000,0)
* 1> (c,6,,1718089202000,0)
*/
watermarkLeft.keyBy(e -> e.f0)
.coGroup(watermarkRight.keyBy(e -> e.f0))
.where(e -> e.f1)
.equalTo(e -> e.f1)
.window(TumblingEventTimeWindows.of(Duration.ofSeconds(5)))
.apply(new RichCoGroupFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>() {
@Override
public void coGroup(Iterable<Tuple3<String, String, Long>> left, Iterable<Tuple3<String, String, Long>> right, Collector<Tuple5<String, String, String, Long, Long>> collector) throws Exception {
// left join
for (Tuple3<String, String, Long> leftTuple : left) {
boolean isJoin = false;
for (Tuple3<String, String, Long> rightTuple : right) {
if (Objects.equals(leftTuple.f2, rightTuple.f2)) {
isJoin = true;
collector.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, rightTuple.f1, leftTuple.f2, rightTuple.f2));
}
}
if (!isJoin) {
collector.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, "", leftTuple.f2, 0L));
}
}
// right join
// for (Tuple3<String, String, Long> rightTuple : right) {
// boolean isJoin = false;
//
// for (Tuple3<String, String, Long> leftTuple : left) {
// if (Objects.equals(leftTuple.f2, rightTuple.f2)) {
// isJoin = true;
// collector.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, rightTuple.f1, leftTuple.f2, rightTuple.f2));
// }
// }
//
// if (!isJoin) {
// collector.collect(new Tuple5<>(rightTuple.f0, "", rightTuple.f1, 0L, rightTuple.f2));
// }
// }
// // inner join
// for (Tuple3<String, String, Long> leftTuple : left) {
// for (Tuple3<String, String, Long> rightTuple : right) {
// if (Objects.equals(leftTuple.f2, rightTuple.f2)) {
// collector.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, rightTuple.f1, leftTuple.f2, rightTuple.f2));
// }
// }
// }
}
}).print();
env.execute();
}
}
3、测试用例
bash
left-join 测试数据
left-1
a,1,1718089200000
b,2,1718089200000
c,3,1718089200000
right-2
a,1,1718089200000
b,2,1718089200000
c,3,1718089200000
left-3
a,4,1718089202000
b,5,1718089202000
c,6,1718089202000
right-4
a,1,1718089202000
b,2,1718089202000
c,3,1718089202000
left-right-5
a,1,1718089205001
b,2,1718089205001
c,3,1718089205001
1> (a,1,1,1718089200000,1718089200000)
1> (b,2,2,1718089200000,1718089200000)
1> (c,3,3,1718089200000,1718089200000)
1> (a,4,,1718089202000,0)
2> (b,5,,1718089202000,0)
1> (c,6,,1718089202000,0)