1.排序:
定义三个文件对文件内容进行排序(数字)
数据:
1.txt
78
89
96
67
2.txt
80
82
84
86
3.txt
88
99
66
77
spark代码:
import org.apache.spark.{HashPartitioner, SparkConf, SparkContext} object paixu { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("MySort").setMaster("local") val sc = new SparkContext(conf) val dataFile = "D://wc//paixu//*.txt" val data = sc.textFile(dataFile,3) var index = 0 val result = data.filter(_.trim().length>0).map(n=>(n.trim.toInt,"")) .partitionBy(new HashPartitioner(1)) .sortByKey() .map(t=>{index += 1;(index,t._1)}) result.saveAsTextFile("D://wc//result") } }
运行结果:
(1,66)
(2,67)
(3,77)
(4,78)
(5,80)
(6,82)
(7,84)
(8,86)
(9,88)
(10,89)
(11,96)
(12,99)