package com.xzdream.spark
import org.apache.spark.{SparkConf, SparkContext}
/**
* Log App
*/
object SparkContextApp {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf();
sparkConf.setAppName("LogApp").setMaster("local[2]");
val sc = new SparkContext(sparkConf);
val lines = sc.textFile("file:///Users/hadoop/scala/spark_demo1/src/main/logs/2020-5-11.log");
// lines.take(3).foreach(println)
/*
//读取每一行数据
lines.map(x => {
val splits = x.split("\t")
val length = splits.length
if(length == 4){
val domain = splits(0)
var traffic = 0L
try{
traffic = splits(1).toLong
}catch {
case e:Exception => 0L
}
(domain,traffic)
}else{
("-",0L)
}
}).reduceByKey(_+_).collect.foreach(println)
*/
//求省份访问量的top10
lines.map(x=>{
val splits = x.split("\t")
val length = splits.length
if(length == 4){
val province = splits(3)
(province,1)
}else{
('-',1)
}
}).reduceByKey(_+_).sortBy(_._2,false).take(10).foreach(println)
sc.stop();
}
}