外部排序

本来是想用JS写的,但是浏览器不支持IO操作,这里用python写了一个二路归并排序

说明

1.最简单的二路归并排序,缓冲区大小设置在156至158行
2.运行sort.py产生三个文件夹ArrayIn,ArrayOut,Merge
3.正常的外部排序应该是一个大文件拆成若干个小文件,这一步我省略了,所以ArrayIn默认就是已经拆成无序状态的小文件,当ArrayIn文件夹为空的时候 ,调用creatData()自动生成小文件
4.Merge文件夹就是调用creatMerge()将小文件整理成归并段,也可以自己增加归并段,删除202行的removeFile(mergePath),可以保留中间生成的文件
5.ArrayOut就是输出文件

#初始化准备
#假如ArrayIn文件为空,则自动生成数据
class LinkLnode:
    def __init__(self):
        self.data = None
        self.next = None

class LinkQueue:
    def __init__(self):
        HeadNode =LinkLnode()
        self.front = HeadNode 
        self.rear = HeadNode
    def IsEmpty(self):
        if (self.front == self.rear):
            return True
        else:
            return False    
    def EnQueue(self,value):
        Lnode = LinkLnode()
        Lnode.data = value
        Lnode.next = None
        self.rear.next = Lnode
        self.rear = Lnode
    def DeQueue(self):
        if (self.front == self.rear):
            return False
        p = self.front.next;
        value = p.data;
        self.front.next = p.next
        if (p == self.rear):
            self.rear = self.front;
        return value
class Buffer:
    def __init__(self,n):
        self.data = []
        for i in range(n):
            self.data.append(None)
        self.front = 0
        self.rear=0
        self.size=n
    def IsEmpty(self):
        if self.front==self.rear:
            return True
        else:
            return False
    def IsFull(self):
        if (self.rear+1)%self.size==self.front:
            return True
        else:
            return False    
    def GetTop(self):
        if not Buffer.IsEmpty(self):
            value=self.data[self.front]
            return value
        else:
            return None
    def EnBuffer(self,value):
        if not Buffer.IsFull(self):
            self.data[self.rear]=value
            self.rear=(self.rear+1)%self.size
            return True
        else:
            return False
    def DeBuffer(self):
        if not Buffer.IsEmpty(self):
            value=self.data[self.front]
            self.front=(self.front+1)%self.size
            return value
        else:
            return False
      
def prepare():
    import os
    currentPath=os.path.abspath('.')
    inPath=os.path.join(currentPath,'ArrayIn')
    #判断ArrayIn文件内是否有文件
    if not os.path.exists(inPath):
        os.mkdir(inPath)
    if not os.listdir(inPath):
        creatData(inPath,4,10)
    


def creatData(path,m,n):
    # 在path路径下创建m个数量为n的文件,
    import math
    import random
    import pickle
    import json
    M=m
    while(m>0):
        #numList=[]
        f=open(path+'/'+str(M-m)+'.txt','w')
        N=n
        while(N>=0):
            num=math.floor(random.randint(0,n))
            f.writelines( str(num)+'\n' )
            #numList.append(num)
            N=N-1
        f.close()
        m=m-1
       
def creatMerge():
    import os
    currentPath=os.path.abspath('.')
    mergePAth=os.path.join(currentPath,'Merge')
    inPath=os.path.join(currentPath,'ArrayIn')
    if not os.path.exists(mergePAth):
        os.mkdir(mergePAth)
    outPath=os.path.join(currentPath,'ArrayOut')
    if not os.path.exists(outPath):
        os.mkdir(outPath)
    if not os.listdir(mergePAth):   
        for root,dirs,files in os.walk(inPath):
            for name in files:
                f=open(inPath+'/'+name,'r')
                nums=[]
                try:
                  while True:
                      num = f.readline()
                      if num:
                          nums.append(int(num.strip()))
                      else:
                          break
                finally:
                    f.close()
                nums.sort(reverse=False)
                #排序好后写入文件
                f=open(mergePAth+'/'+name,'w')
                for num in nums:
                    f.writelines( str(num)+'\n' )
                f.close()        
#先装填缓冲区,然后比较,当缓冲区为空时,装填缓冲区
def mergeSort():
    import os
    import time
    #localtime = time.localtime(time.time())
    outName=time.strftime("%Y-%m-%d %H-%M-%S", time.localtime())+'.txt'
    currentPath=os.path.abspath('.')
    mergePath=os.path.join(currentPath,'Merge')
    outPath=os.path.join(currentPath,'ArrayOut')
    queue=LinkQueue()
    totalName=0
    for root,dirs,files in os.walk(mergePath):
        for name in files:
            totalName=totalName+1
            queue.EnQueue(name)
    while not queue.IsEmpty():
        A=queue.DeQueue()
        if queue.IsEmpty():
            break
        B=queue.DeQueue()
        fA=open(mergePath+'/'+A,'r')
        fB=open(mergePath+'/'+B,'r')
        fC=open(mergePath+'/'+str(totalName)+'.txt','w')
        BufferA=Buffer(3)
        BufferB=Buffer(3)
        BufferC=Buffer(6)
        
        fillBuffer(BufferA,fA)
        fillBuffer(BufferB,fB)        
        while not BufferA.IsEmpty() or not BufferB.IsEmpty() or not BufferC.IsEmpty():
            
            TopA=BufferA.GetTop()
            if TopA==None:
                fillBuffer(BufferA,fA)
                TopA=BufferA.GetTop()
            TopB=BufferB.GetTop()
            if TopB==None:
                fillBuffer(BufferB,fB)
                TopB=BufferB.GetTop()
            #print(TopA,TopB)
            if TopA!=None and TopB!=None:
                if TopA<=TopB:
                    BufferC.EnBuffer(TopA)
                    BufferA.DeBuffer()
                elif TopA>TopB:
                    BufferC.EnBuffer(TopB)
                    BufferB.DeBuffer() 
            else:
                if(TopB==None and TopA!=None):
                    BufferC.EnBuffer(TopA)
                    BufferA.DeBuffer()

                elif(TopA==None and TopB!=None):
                    BufferC.EnBuffer(TopB)
                    BufferB.DeBuffer()
                    
            if (BufferC.IsFull()) or ( not BufferC.IsEmpty() and TopA==None and TopB==None):
                while not BufferC.IsEmpty():
                    numC=BufferC.DeBuffer()
                    fC.writelines(str(numC)+'\n' )
        fA.close()
        #os.remove(mergePath+'/'+A)
        fB.close()
        #os.remove(mergePath+'/'+B)
        fC.close()
        queue.EnQueue(str(totalName)+'.txt')
        totalName=totalName+1                    
    #复制
    copyFile(mergePath+'/'+str(totalName-1)+'.txt',outPath+'/'+outName)
    removeFile(mergePath)
    #删除Merge文件内容
def removeFile(Path):
    import os
    for root,dirs,files in os.walk(Path):
        for name in files:
            file=os.path.join(Path,name)
            os.remove(file)
def copyFile(PathA,PathB):
    fA=open(PathA,'r')
    fB=open(PathB,'w')
    try:
        while True:
            num = fA.readline()
            if num:
              fB.writelines(num)
            else:
              break
    finally:
            fA.close()
            fB.close()
def fillBuffer(Buffer,f):
    while True:
        if not Buffer.IsFull():
            data=f.readline()
            #print(data)
            if data:
                readStatu=False
                Buffer.EnBuffer(int(data.strip()))
            else:#文件已读完
                readStatu=True
                break
        else:
            break

    
   
prepare()#产生测试数据    
creatMerge()#将无序的随机数处理成归并段,可以在merge文件夹下查看
mergeSort()
print('完毕')




 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值