1.新建maven项目
在opm.xml导入
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.10</artifactId>
<version>${spark.version}</version>
</dependency>
2.代码
package day05.d
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object StreamingWordCount {
def main(args: Array[String]): Unit = {
//StreamingContext
val conf=new SparkConf().setAppName("StreamingWordCount").setMaster("local[2]")
val sc=new SparkContext(conf)
val ssc=new StreamingContext(sc,Seconds(5))
//接收数据
val ds=ssc.socketTextStream("192.168.123.151",8888)
//DStream是一个特殊的RDD
val result=ds.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
//打印结果
result.print()
ssc.start()
//等待结束
ssc.awaitTermination()
}
}
在Linux输入:nc -lk 8888
如果没安装 nc
/usr/bin/yum install nc
然后就输入单词个数每隔5秒钟就统计一次
3.本地跑程序
4.减少日志
新建一个object
package day05.d
import org.apache.log4j.{Level, Logger}
import org.apache.spark.Logging
object LoggerLevels extends Logging{
def setStreamingLogLevels() {
val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
if (!log4jInitialized) {
logInfo("Setting log level to [WARN] for streaming example."
+ " To override add a custom log4j.properties to the classpath.")
Logger.getRootLogger.setLevel(Level.WARN)
}
}
}
在上一个加入
LoggerLevels.setStreamingLogLevels()
也就是:
package day05.d
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.streaming.{Seconds, StreamingContext}
object StreamingWordCount {
def main(args: Array[String]): Unit = {
LoggerLevels.setStreamingLogLevels()//减少日志
//StreamingContext
val conf=new SparkConf().setAppName("StreamingWordCount").setMaster("local[2]")
val sc=new SparkContext(conf)
val ssc=new StreamingContext(sc,Seconds(5))
//接收数据
val ds=ssc.socketTextStream("192.168.123.151",8888)
//DStream是一个特殊的RDD
val result=ds.flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
//打印结果
result.print()
ssc.start()
//等待结束
ssc.awaitTermination()
}
}
结果截图:
至此,测试成功