scala多线程外部排序算法

scala多线程外部排序算法


源代码
/**
  * Created by wilbur on 6/17/17.
  */

import java.io._
import java.util
import java.util.concurrent.{Callable, ExecutorService, Executors, FutureTask}

import scala.collection.mutable.ArrayBuffer
import scala.io.Source
object sort {
  def main(args: Array[String]): Unit = {
    val timebegin = System.currentTimeMillis()
    val readthreadnum = 4  //读文件的线程数
    val filesize:Long=Source.fromFile("/home/user/inputdata").getLines().length //获取文件行数
    val fileblock=filesize/readthreadnum //文件分割
    val fileNames = new ArrayBuffer[ArrayBuffer[String]]()
    val receiver=new ArrayBuffer[ArrayBuffer[ArrayBuffer[String]]]()
    val threadPool: ExecutorService = Executors.newFixedThreadPool(readthreadnum)
    try {
      val futureList = new ArrayBuffer[FutureTask[ArrayBuffer[ArrayBuffer[String]]]]()
      for (i <- 1 to readthreadnum) {
        val fr= new BufferedReader(new FileReader("/home/user/inputdata"))
        val array=new Array[BufferedReader](4)
        array(i-1)=fr
        val future = new FutureTask[ArrayBuffer[ArrayBuffer[String]]](new Callable[ArrayBuffer[ArrayBuffer[String]]] {
          override def call(): ArrayBuffer[ArrayBuffer[String]] = {
            for(j<-0 until (fileblock*(i-1)).toInt)
              array(i-1).readLine()
            val sortThread1 = new sortThread()
            fileNames+= sortThread1.readsort(array(i - 1), fileblock)
            fileNames
          }
        })
        futureList += future
        threadPool.execute(future)
      }
      for (fu <- futureList) {
        receiver+= fu.get()
      }
    } finally {
      threadPool.shutdown()
    }
    val time1=System.currentTimeMillis()
    println("The read time is  " + (time1 - timebegin) / 1000.0 + "  s"+"\n")
    val tempath="/project/sort/temp/temp"
    val out=new PrintWriter(tempath)//输出分割产生的临时文件的文件名到temp文件
    for(j<-receiver(0))
      for (k<-j)
        out.println(k)
    out.close()


    val merge=new mergethread  //定义一个归并对象,并传入temp文件
    merge.merge(tempath)
    val deletefile=new File(tempath).delete()//删除temp文件
    val resultname=new util.ArrayList[String]()
    val file1=new File("/project/sort/temp").listFiles()//获取临时目录下所有文件,并将文件名读入resultname
    for (m<-0 until file1.length)
      resultname.add(file1(m).getAbsolutePath)
    val print=new sortThread
    print.mergeSort(resultname)  //归并resultname内的文件
    val timeover = System.currentTimeMillis()
    println("The merge time is  " + (timeover - time1) / 1000.0 + "  s" + "\n\n\n")
    println("The all runtime is  " + (timeover - timebegin) / 1000.0 + "  s" + "\n\n\n")
  }}

import java.io._
import java.util
import scala.collection.mutable.ArrayBuffer
import scala.util.control.Breaks.{break, breakable}

/**
  * Created by wilbur on 6/17/17.
  */
class sortThread {
  //源数据文件读取
  val size = 10000000
  //这里是定义我们将源文件中以10000条记录作为单位进行分割
  val nums = new Array[Long](size) //临时存放分割时的记录
  //保存所有分割文件的名称
  var index = 0
  def readsort(fr:BufferedReader,fileblock:Long):ArrayBuffer[String]={
    val fileNames = new ArrayBuffer[String]()
    breakable(for (i <- 1 to fileblock.toInt){
      val num=fr.readLine()//从原文件中读取一条记录
      if(i==fileblock){//如果读取完毕后,进行一次排序并保存
        fileNames+=(sortAndSave(nums,index))
        index=0
        break
      }
      nums(index)=num.toLong
      index=index+1
      if(index==size){//当nums里面读的数字到达长度边界时,排序,存储
        fileNames+=(sortAndSave(nums,index))//sortAndSave是将nums中前index条记录先快速排序,然后存入文件,最好将文件名返回
        index=0//重置index
      }
    })
    fileNames
  }

  def sortAndSave(nums:Array[Long],size:Int):String={
    qsort(nums,0,size-1)
    val fileName="/project/sort/temp/"+System.nanoTime()
    val bw=new PrintWriter(new FileWriter(fileName))
    for(i<-0 to size-1)
      bw.println(nums(i))
    bw.close()
    fileName
  }

  def mergeSort(fileNames:util.ArrayList[String]):Unit={
    val tempFileNames=new util.ArrayList[String]()
    var i=0
    while(i<fileNames.size()){
      val resultFileName="/project/sort/temp/"+System.nanoTime()
      tempFileNames.add(resultFileName)
      val bw=new BufferedWriter(new FileWriter(resultFileName))
      val file1=new File(fileNames.get(i))
      val br1=new BufferedReader(new FileReader(file1))
      i+=1
      if(i<fileNames.size()){
        val file2=new File(fileNames.get(i))
        val br2=new BufferedReader(new FileReader(file2))
        var num1:Int=0
        var num2:Int=0
        var isFrist = true
        var firstNext = true
        var numVal1:String=""
        var numVal2:String=""
        breakable(while(true){
          if(isFrist){
            numVal1=br1.readLine()
            numVal2=br2.readLine()
            num1=Integer.valueOf(numVal1)
            num2=Integer.valueOf(numVal2)
            isFrist=false
          }
          else if(firstNext) numVal1=br1.readLine()
          else
            numVal2=br2.readLine()
          if(numVal1!=null&&numVal2!=null){
            if(firstNext){
              num1=Integer.valueOf(numVal1)
            }
            else num2=Integer.valueOf(numVal2)
            if(num1<num2){
              bw.write(num1+"\n")
              firstNext=true
            }else{
              bw.write(num2+"\n")
              firstNext=false
            }
          }else{
            if(numVal1!=null)bw.write(numVal1+"\n")
            if(numVal2!=null)bw.write(numVal2+"\n")
            break
          }
        })
        breakable(while(true){
          numVal2=br2.readLine()
          if(numVal2!=null)bw.write(numVal2+"\n")
          else break
        })
        br2.close()
        file2.delete()
      }
      breakable(while(true){
        val numVal1=br1.readLine()
        if(numVal1!=null){
          bw.write(numVal1+"\n")
        }
        else break
      })
      br1.close()
      file1.delete()
      bw.close()
      i+=1
    }
    val size=tempFileNames.size()
    if(size>1){
      mergeSort(tempFileNames)
    }else if(size==1){
      val file=new File(tempFileNames.get(0))
      file.renameTo(new File("/project/sort/temp/"+System.nanoTime()))
    }
  }

  def qsort(inputData: Array[Long], left: Int, right: Int): Unit = {
    if (left < right) {
      var i = left
      var j = right
      val x = inputData(i)
      while (i < j) {
        while (i < j && inputData(j) > x) j = j - 1 /* 从右向左找第一个小于x的数 */
        if (i < j) {
          inputData(i) = inputData(j)
          i = i + 1
        }
        while (i < j && inputData(i) < x) i = i + 1 /* 从左向右找第一个大于x的数 */
        if (i < j) {
          inputData(j) = inputData(i)
          j = j - 1
        }
      }
      inputData(i) = x
      qsort(inputData, left, i - 1) /* 递归调用 */
      qsort(inputData, i + 1, right)
    }
  }
}

import java.util.concurrent.{Callable, ExecutorService, Executors, FutureTask}
import java.util

import scala.collection.mutable.ArrayBuffer
import scala.io.Source

/**
  * Created by wilbur on 6/20/17.
  */
//定义归并文件的class,采用多线程归并
class mergethread {
  def merge(filepath:String): Unit ={
    val mergethreadnum=4
    val filelength=Source.fromFile(filepath).getLines().length
    val mergefile=new ArrayBuffer[String]()
    val receiver1=new ArrayBuffer[Int]()
    val threadPool1: ExecutorService = Executors.newFixedThreadPool(mergethreadnum)
    try{
      val futureList1 = new ArrayBuffer[FutureTask[Int]]()
      val array=new Array[util.ArrayList[String]](4)
      for (i <- 1 to  mergethreadnum) {
        array(i-1)=new util.ArrayList[String]()
        val file=Source.fromFile(filepath).getLines()
        val filedata=file.drop((filelength/mergethreadnum)*(i-1))
        val future = new FutureTask[Int](new Callable[Int] {
          override def call(): Int= {
            for (j <- 1 to filelength / mergethreadnum){
              val line = filedata.next()
              if (line != null)
                array(i-1).add(line.toString)
            }
            if(i==mergethreadnum)
              while (filedata.hasNext){
                val line=filedata.next()
                if(line!=null)
                  array(i-1).add(line.toString)
              }
            val sortThread2 = new sortThread()
            sortThread2.mergeSort(array(i-1))
            0
          }})
        futureList1 += future
        threadPool1.execute(future)
      }
      for (fu <- futureList1) {
        receiver1+=fu.get()
      }
    } finally {
      threadPool1.shutdown()
    }
  }}
  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值