环境准备
- 版本
scala版本:2.11.8
spark版本:2.1.0 - pom.xml文件 添加依赖jar包
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.11</artifactId>
<version>${spark.version}</version>
</dependency>
Spark Streaming wc练习
socket 统计WC nc -lk 9999 服务 的输入
- 代码
package com.imooc.spark.sparkStreaming
import org.apache.spark._
import org.apache.spark.streaming._ // not necessary since Spark 1.3
object NetworkWordCount {
def main(args: Array[String]): Unit = {
// Create a local StreamingContext with two working thread and batch interval of 1 second.
// The master requires 2 cores to prevent a starvation scenario.
val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(5))
// Create a DStream that will connect to hostname:port, like localhost:9999
val lines = ssc.socketTextStream("192.168.52.130", 9999)
//val lines = ssc.textFileStream("E:///dept.txt")
// Split each line into words
val words = lines.flatMap(_.split(",")) // not necessary since Spark 1.3
// Count each word in each batch
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ + _)