1 用nc工具发送消息
2 核心代码如下
package cn.taobao;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.StorageLevels;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.RowFactory;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import java.util.ArrayList;
import java.util.Arrays;
public class Save_2 {
public static void main(String[] args) throws Exception {
// StreamingContext 编程入口
JavaStreamingContext ssc = new JavaStreamingContext(
"local[*]",
"Save_1",
Durations.seconds(5),
System.getenv("SPARK_HOME"),
JavaStreamingContext.jarOfClass(Save_2.class.getClass()));
ssc.sparkContext().setLogLevel("ERROR");
//数据接收器(Receiver)
//创建一个接收器(JavaReceiverInputDStream),这个接收器接收一台机器上的某个端口通过socket发送过来的数据并处理
JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
"158.158.4.49", 9998, StorageLevels.MEMORY_AND_DISK_SER);
/*
假如输入 aa bb cc aa
*/
/*
返回 aa
bb
cc
aa
*/
JavaDStream<String> stringJavaDStream = lines.flatMap(xx -> {
String[] str_split = xx.split(" ");
return Arrays.asList(str_split).iterator();
});
//用SPARK SQL来进行WORD COUNT
stringJavaDStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
@Override
public void call(JavaRDD<String> stringJavaRDD) throws Exception {
/*
和普通的spark sql代码差别不大
*/
//创建sparksession
SparkSession sparkSession = SparkSession.builder().config(stringJavaRDD.context().getConf()).getOrCreate();
/*
为 创建 createDataFrame 做准备
*/
//构建数据
JavaRDD<Row> javaRddRow = stringJavaRDD.map(xx -> {
return RowFactory.create(xx);
});
//构建schema
ArrayList<StructField> structFields = new ArrayList<>();
structFields.add(DataTypes.createStructField("name", DataTypes.StringType, true));
StructType structType = DataTypes.createStructType(structFields);
Dataset<Row> dataFrame = sparkSession.createDataFrame(javaRddRow, structType);
dataFrame.createOrReplaceTempView("words");
Dataset<Row> resultOut = sparkSession.sql("select name,count(*) as nameCount from words group by name");
resultOut.show();
}
});
//显式的启动数据接收
ssc.start();
try {
//来等待计算完成
ssc.awaitTermination();
} catch (Exception e) {
e.printStackTrace();
} finally {
ssc.close();
}
}
}
3 效果演示
输入并回车
aa aa aa bb
再输入并回车
bb bb
结果如下
+----+---------+
|name|nameCount|
+----+---------+
| aa| 3|
| bb| 1|
+----+---------+
+----+---------+
|name|nameCount|
+----+---------+
| bb| 2|
+----+---------+