在158.158.4.49上开两个窗口,分别运行nc -lk 9998,nc -lk 9999
输入数据的格式为 aa bb的样子,中间有空格,看以下代码就知道了
唯一要注意的是local[*]
package cn.taobao;
import org.apache.spark.api.java.StorageLevels;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import scala.Tuple2;
import java.util.regex.Pattern;
public class TwoSocketJoin_Test
{
public static void main(String[] args) throws Exception {
// StreamingContext 编程入口
JavaStreamingContext ssc = new JavaStreamingContext(
/*
按照Cpu最多Cores来设置线程数,最少也要写成3,因为接收占了两个线程,处理数据也要一个线程
*/
"local[*]",
"TwoSocketJoin_Test",
Durations.seconds(4),
System.getenv("SPARK_HOME"),
JavaStreamingContext.jarOfClass(TwoSocketJoin_Test.class.getClass()));
ssc.sparkContext().setLogLevel("ERROR");
//数据接收器(Receiver)
//创建一个接收器(JavaReceiverInputDStream),这个接收器接收一台机器上的某个端口通过socket发送过来的数据并处理
//java8的普通写法
JavaReceiverInputDStream<String> socketTextStream_1 = ssc.socketTextStream(
"158.158.4.49", 9998, StorageLevels.MEMORY_AND_DISK_SER);
JavaReceiverInputDStream<String> socketTextStream_2 = ssc.socketTextStream(
"158.158.4.49", 9999, StorageLevels.MEMORY_AND_DISK_SER);
//普通写法
JavaPairDStream<String, String> javaPairDStream_1 = socketTextStream_1.mapToPair(new PairFunction<String, String, String>() {
@Override
public Tuple2<String, String> call(String s) throws Exception {
String[] s1 = Pattern.compile(" ").split(s);
return new Tuple2<>(s1[0], s1[1]);
}
});
//java8 lambda的写法
JavaPairDStream<String, String> javaPairDStream_2 = socketTextStream_2.mapToPair(s -> {
String[] s1 = Pattern.compile(" ").split(s);
return new Tuple2<>(s1[0], s1[1]);
});
//关联打印输出
JavaPairDStream<String, Tuple2<String, String>> join = javaPairDStream_1.join(javaPairDStream_2);
join.print();
javaPairDStream_1.fullOuterJoin(javaPairDStream_2).print();
javaPairDStream_1.leftOuterJoin(javaPairDStream_2).print();
javaPairDStream_1.rightOuterJoin(javaPairDStream_2).print();
//显式的启动数据接收
ssc.start();
try {
//来等待计算完成
ssc.awaitTermination();
} catch (Exception e) {
e.printStackTrace();
} finally {
ssc.close();
}
}
}
运行结果如下
如分别在两个shell窗口中输入 aa 1 和 aa 2
-------------------------------------------
Time: 1619575952000 ms
-------------------------------------------
-------------------------------------------
Time: 1619575956000 ms
-------------------------------------------
(aa,(1,2))
-------------------------------------------
Time: 1619575956000 ms
-------------------------------------------
(aa,(Optional[1],Optional[2]))