启动nc:
nc -lk 9999
启动spark-shell:
val lines = spark.readStream.
| format("socket").
| option("host","localhost").
| option("port",9999).
| load();
lines: org.apache.spark.sql.DataFrame = [value: string]
val words = lines.as[String].flatMap(_.split(" "))
val wordCounts = words.groupBy("value").count()
val query = wordCounts.writeStream.
| outputMode("complete").
| format("console").
| start();
query: org.apache.spark.sql.streaming.StreamingQuery = org.apache.spark.sql.execution.streaming.Streamin gQueryWrapper@22cf6c9c
query.awaitTermination()
-------------------------------------------
Batch: 0
-------------------------------------------
+-----+-----+
|value|count|
+-----+-----+
|hello| 1|
| word| 1|
+-----+-----+
-------------------------------------------
Batch: 1
-------------------------------------------
+-----+-----+
|value|count|
+-----+-----+
|hello| 2|
| word| 1|
|world| 1|
+-----+-----+
-------------------------------------------
Batch: 2
-------------------------------------------
+-----+-----+
|value|count|
+-----+-----+
|hello| 2|
| bye| 1|
| word| 1|
|world| 2|
| ny| 1|
+-----+-----+
-------------------------------------------
Batch: 3
-------------------------------------------
+------+-----+
| value|count|
+------+-----+
| hello| 2|
| bye| 1|
| word| 1|
|sounds| 1|
| world| 2|
| good| 1|
| ny| 1|
+------+-----+
-------------------------------------------
Batch: 4
-------------------------------------------
+------+-----+
| value|count|
+------+-----+
| hello| 2|
| bye| 3|
| word| 1|
|sounds| 1|
| world| 2|
| good| 1|
| ny| 1|
+------+-----+