import java.io.File
import scala.collection.mutable.Map
val textFilePath ="D:/doc/spark/input"
//create a File
val dirFile = new File(textFilePath)
//to get every file or dir's path
val files = dirFile.listFiles()
//to save the word
val resultMap1 = Map.empty[String,Int]
//to get every file in the dir
for(file <- files){
//get every file's data
val data = Source.fromFile(file)
//to get every word
var str = data.getLines().flatMap(s => s.split(" "))
// judge and count
str foreach {
word =>
if(resultMap1.contains(word)){
resultMap1(word) += 1
}else{
resultMap1 += (word->1)
}
}
}
//print the result
resultMap1.foreach(x => println(x._1,x._2))
println("--------------")
//filter null
val resultMap2 = resultMap1.filter(x => x._1.nonEmpty)
resultMap2.foreach(x => println(x._1,x._2))
println("--------------")
//sortBY
val resultMap3 = resultMap2.toList.sortBy(_._2)
resultMap3.foreach(x => println(x._1,x._2))
递归调用列出文件夹下 所有文件夹和 文件的数据
/*word Count*/ import java.io.File import scala.collection.mutable.Map val textFilePath ="D:/doc/spark/input" //for save the result words val resultMap1 = Map.empty[String,Int] /** * func to recursion dir's all files * */ def toGetAllFile(rootPath:File): Map[String,Int] ={ rootPath.listFiles().map(x => { //judge current path is dir or file if(x.isDirectory){ //dir then recursion toGetAllFile(x) }else{ //file ,then read data val source = Source.fromFile(x) //get every lines data and split into words val words = source.getLines().flatMap(s => s.split(" ")) //judge resultMap already have or new one words foreach { s => if(resultMap1.contains(s)){ //resultMap already have then add 1 resultMap1(s) +=1 }else{ //new one then add this parameter resultMap1 += (s -> 1) } } } }) return resultMap1 } toGetAllFile(new File(textFilePath)) //filter the null val stringToInt = resultMap1.filter(x => x._1.nonEmpty) //sort by val tuples1 = stringToInt.toList.sortBy(x => x._2) //print the result tuples1.foreach(x => println("tuples1 is: ",x._1,x._2)) println("--------------------------------") //sort by val tuples2 = stringToInt.toList.sortBy(x => -x._2) //print the result tuples2.foreach(x => println("tuples2 is: ",x._1,x._2))