/**
* Created by wilbur on 6/17/17.
*/import java.io._
import java.util
import java.util.concurrent.{Callable, ExecutorService, Executors, FutureTask}
import scala.collection.mutable.ArrayBuffer
import scala.io.Source
objectsort {def main(args: Array[String]): Unit = {
val timebegin = System.currentTimeMillis()
val readthreadnum = 4//读文件的线程数val filesize:Long=Source.fromFile("/home/user/inputdata").getLines().length //获取文件行数val fileblock=filesize/readthreadnum //文件分割val fileNames = new ArrayBuffer[ArrayBuffer[String]]()
val receiver=new ArrayBuffer[ArrayBuffer[ArrayBuffer[String]]]()
val threadPool: ExecutorService = Executors.newFixedThreadPool(readthreadnum)
try {
val futureList = new ArrayBuffer[FutureTask[ArrayBuffer[ArrayBuffer[String]]]]()
for (i <- 1 to readthreadnum) {
val fr= new BufferedReader(new FileReader("/home/user/inputdata"))
val array=new Array[BufferedReader](4)
array(i-1)=fr
val future = new FutureTask[ArrayBuffer[ArrayBuffer[String]]](new Callable[ArrayBuffer[ArrayBuffer[String]]] {
overridedef call(): ArrayBuffer[ArrayBuffer[String]] = {
for(j<-0 until (fileblock*(i-1)).toInt)
array(i-1).readLine()
val sortThread1 = new sortThread()
fileNames+= sortThread1.readsort(array(i - 1), fileblock)
fileNames
}
})
futureList += future
threadPool.execute(future)
}
for (fu <- futureList) {
receiver+= fu.get()
}
} finally {
threadPool.shutdown()
}
val time1=System.currentTimeMillis()
println("The read time is " + (time1 - timebegin) / 1000.0 + " s"+"\n")
val tempath="/project/sort/temp/temp"val out=new PrintWriter(tempath)//输出分割产生的临时文件的文件名到temp文件for(j<-receiver(0))
for (k<-j)
out.println(k)
out.close()
val merge=new mergethread //定义一个归并对象,并传入temp文件
merge.merge(tempath)
val deletefile=new File(tempath).delete()//删除temp文件val resultname=new util.ArrayList[String]()
val file1=new File("/project/sort/temp").listFiles()//获取临时目录下所有文件,并将文件名读入resultnamefor (m<-0 until file1.length)
resultname.add(file1(m).getAbsolutePath)
val print=new sortThread
print.mergeSort(resultname) //归并resultname内的文件val timeover = System.currentTimeMillis()
println("The merge time is " + (timeover - time1) / 1000.0 + " s" + "\n\n\n")
println("The all runtime is " + (timeover - timebegin) / 1000.0 + " s" + "\n\n\n")
}}
import java.io._
import java.util
import scala.collection.mutable.ArrayBuffer
import scala.util.control.Breaks.{break, breakable}
/**
* Created by wilbur on 6/17/17.
*/classsortThread {//源数据文件读取val size = 10000000//这里是定义我们将源文件中以10000条记录作为单位进行分割val nums = new Array[Long](size) //临时存放分割时的记录//保存所有分割文件的名称var index = 0def readsort(fr:BufferedReader,fileblock:Long):ArrayBuffer[String]={
val fileNames = new ArrayBuffer[String]()
breakable(for (i <- 1 to fileblock.toInt){
val num=fr.readLine()//从原文件中读取一条记录if(i==fileblock){//如果读取完毕后,进行一次排序并保存
fileNames+=(sortAndSave(nums,index))
index=0break
}
nums(index)=num.toLong
index=index+1if(index==size){//当nums里面读的数字到达长度边界时,排序,存储
fileNames+=(sortAndSave(nums,index))//sortAndSave是将nums中前index条记录先快速排序,然后存入文件,最好将文件名返回
index=0//重置index
}
})
fileNames
}
def sortAndSave(nums:Array[Long],size:Int):String={
qsort(nums,0,size-1)
val fileName="/project/sort/temp/"+System.nanoTime()
val bw=new PrintWriter(new FileWriter(fileName))
for(i<-0 to size-1)
bw.println(nums(i))
bw.close()
fileName
}
def mergeSort(fileNames:util.ArrayList[String]):Unit={
val tempFileNames=new util.ArrayList[String]()
var i=0while(i<fileNames.size()){
val resultFileName="/project/sort/temp/"+System.nanoTime()
tempFileNames.add(resultFileName)
val bw=new BufferedWriter(new FileWriter(resultFileName))
val file1=new File(fileNames.get(i))
val br1=new BufferedReader(new FileReader(file1))
i+=1if(i<fileNames.size()){
val file2=new File(fileNames.get(i))
val br2=new BufferedReader(new FileReader(file2))
var num1:Int=0var num2:Int=0var isFrist = truevar firstNext = truevar numVal1:String=""var numVal2:String=""
breakable(while(true){
if(isFrist){
numVal1=br1.readLine()
numVal2=br2.readLine()
num1=Integer.valueOf(numVal1)
num2=Integer.valueOf(numVal2)
isFrist=false
}
elseif(firstNext) numVal1=br1.readLine()
else
numVal2=br2.readLine()
if(numVal1!=null&&numVal2!=null){
if(firstNext){
num1=Integer.valueOf(numVal1)
}
else num2=Integer.valueOf(numVal2)
if(num1<num2){
bw.write(num1+"\n")
firstNext=true
}else{
bw.write(num2+"\n")
firstNext=false
}
}else{
if(numVal1!=null)bw.write(numVal1+"\n")
if(numVal2!=null)bw.write(numVal2+"\n")
break
}
})
breakable(while(true){
numVal2=br2.readLine()
if(numVal2!=null)bw.write(numVal2+"\n")
elsebreak
})
br2.close()
file2.delete()
}
breakable(while(true){
val numVal1=br1.readLine()
if(numVal1!=null){
bw.write(numVal1+"\n")
}
elsebreak
})
br1.close()
file1.delete()
bw.close()
i+=1
}
val size=tempFileNames.size()
if(size>1){
mergeSort(tempFileNames)
}elseif(size==1){
val file=new File(tempFileNames.get(0))
file.renameTo(new File("/project/sort/temp/"+System.nanoTime()))
}
}
def qsort(inputData: Array[Long], left: Int, right: Int): Unit = {
if (left < right) {
var i = left
var j = right
val x = inputData(i)
while (i < j) {
while (i < j && inputData(j) > x) j = j - 1/* 从右向左找第一个小于x的数 */if (i < j) {
inputData(i) = inputData(j)
i = i + 1
}
while (i < j && inputData(i) < x) i = i + 1/* 从左向右找第一个大于x的数 */if (i < j) {
inputData(j) = inputData(i)
j = j - 1
}
}
inputData(i) = x
qsort(inputData, left, i - 1) /* 递归调用 */
qsort(inputData, i + 1, right)
}
}
}
import java.util.concurrent.{Callable, ExecutorService, Executors, FutureTask}
import java.util
import scala.collection.mutable.ArrayBuffer
import scala.io.Source
/**
* Created by wilbur on 6/20/17.
*///定义归并文件的class,采用多线程归并classmergethread {def merge(filepath:String): Unit ={
val mergethreadnum=4val filelength=Source.fromFile(filepath).getLines().length
val mergefile=new ArrayBuffer[String]()
val receiver1=new ArrayBuffer[Int]()
val threadPool1: ExecutorService = Executors.newFixedThreadPool(mergethreadnum)
try{
val futureList1 = new ArrayBuffer[FutureTask[Int]]()
val array=new Array[util.ArrayList[String]](4)
for (i <- 1 to mergethreadnum) {
array(i-1)=new util.ArrayList[String]()
val file=Source.fromFile(filepath).getLines()
val filedata=file.drop((filelength/mergethreadnum)*(i-1))
val future = new FutureTask[Int](new Callable[Int] {
overridedef call(): Int= {
for (j <- 1 to filelength / mergethreadnum){
val line = filedata.next()
if (line != null)
array(i-1).add(line.toString)
}
if(i==mergethreadnum)
while (filedata.hasNext){
val line=filedata.next()
if(line!=null)
array(i-1).add(line.toString)
}
val sortThread2 = new sortThread()
sortThread2.mergeSort(array(i-1))
0
}})
futureList1 += future
threadPool1.execute(future)
}
for (fu <- futureList1) {
receiver1+=fu.get()
}
} finally {
threadPool1.shutdown()
}
}}