val value: RDD[String] = sc.newAPIHadoopFile[LongWritable, Text, TextInputFormat](path)
.asInstanceOf[NewHadoopRDD[LongWritable, Text]]
.mapPartitionsWithInputSplit(
(inputSplit: InputSplit, iterator: Iterator[(LongWritable, Text)]) => {
val file = inputSplit.asInstanceOf[FileSplit]
val fileName = file.getPath.getName
iterator.map(line => {
//将文件名添加到rdd的结尾
line._2.toString + fileName
})
})
spark 读取文件并获取文件名
于 2023-08-24 11:34:52 首次发布