一.报错
Error while running command to get file permissions : java.io.IOException: (null) entry in command string: null ls -F D:\软件\project\Spark-learn\datas\1.txt at org.apache.hadoop
二.代码
package wc
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object wc {
def main(args: Array[String]): Unit = {
//Application
//spark框架
//Application建立与spark框架的连接
//连接哪一个,做一些配置
val sparkConf = new SparkConf().setMaster("local").setAppName("WordCount")
val sc = new SparkContext(sparkConf)
//执行业务操作
//1.读取文件,一行一行获取数据
val lines: RDD[String] = sc.textFile("D:\\软件\\project\\Spark-learn\\datas")
//2.将一行数据进行切分,形成一个个单词
//flatMap:将整体拆分成个体(扁平化操作)
val words: RDD[String] = lines.flatMap(_.split(" "))
//3.将数据根据单词进行分组,后统计
val wordGroup: RDD[(String, Iterable[String])] = words.groupBy(word=>word)
//4.对分组后的数据进行转换
//map:结构转换
val wordToCount: RDD[(String, Int)] = wordGroup.map {
case (word, list) => {
(word, list.size)
}
}
//5.jinag转换结果采集到控制台打印出来
val array: Array[(String, Int)] = wordToCount.collect()
array.foreach(println)
//关闭连接
sc.stop()
}
}
三.解决方法
数据文件读取不到:
代码修改如下,即可
//1.读取文件,一行一行获取数据
val lines: RDD[String] = sc.textFile("D:\\软件\\project\\Spark-learn\\datas\\*")