官网实例
http://spark.apache.org/docs/2.3.0/structured-streaming-programming-guide.html#quick-example
实例代码Complete
import org.apache.spark.sql.functions._
import org.apache.spark.sql.SparkSession
object StructuredNetworkWordCount {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder
.appName("StructuredNetworkWordCount")
.master("local[2]")
.getOrCreate()
import spark.implicits._
// Create DataFrame representing the stream of input lines from connection to localhost:9999
val lines = spark.readStream
.format("socket")
.option("host", "cdh1")
.option("port", 9999)
.load()
// Split the lines into words
val words = lines.as[String].flatMap(_.split(" "))
// Generate running word count
val wordCounts = words.groupBy("value").count()
val query = wordCounts.writeStream
.outputMode("complete")
.format("console")
.start()
query.awaitTermination()
}
}
启动测试
nc -lk 9999
+-----+-----+
|value|count|
+-----+-----+
|spark| 3|
+-----+-----+
+------+-----+
| value|count|
+------+-----+
| spark| 6|
|hadoop| 1|
+------+-----+
实例代码Append
import org.apache.spark.sql.functions._
import org.apache.spark.sql.SparkSession
object StructuredNetworkWordCountAppend {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder
.appName("StructuredNetworkWordCount")
.master("local[2]")
.getOrCreate()
import spark.implicits._
// Create DataFrame representing the stream of input lines from connection to localhost:9999
val lines = spark.readStream
.format("socket")
.option("host", "cdh1")
.option("port", 9999)
.load()
// Split the lines into words
val words = lines.as[String]
// Generate running word count
val query = words.writeStream
.outputMode("append")
.format("console")
.start()
query.awaitTermination()
}
}
启动测试
[root@cdh1 ~]# nc -lk 9999
spark
spark
spark
-------------------------------------------
Batch: 0
-------------------------------------------
+-----+
|value|
+-----+
|spark|
+-----+
-------------------------------------------
Batch: 1
-------------------------------------------
+-----+
|value|
+-----+
|spark|
|spark|
+-----+