python遍历文件夹并按日期排列文件

可怜d小小杰

于 2024-03-19 13:07:00 发布

阅读量512

点赞数 8

分类专栏： python cmd 文章标签： python windows

本文链接：https://blog.csdn.net/weixin_39810251/article/details/136628831

版权

python 同时被 2 个专栏收录

16 篇文章 1 订阅

订阅专栏

cmd

4 篇文章 0 订阅

订阅专栏

说到用python遍历文件，首要会想到os.walk(),这是错的，好吧，也没有很错。。。。反正把我累到了
废话乱说，上代码

from datetime import date, datetime
import os

def cft(filepath):
    sub=[]
    sub1=[]

    for f,sf,file in os.walk(filepath,topdown=False):
                    
                    for ff in file:
                        sub1.append(f)
                        sub.append(os.path.join(f,ff))
    
    a=0                 
    dictd={}
    
    newsub=sorted(sub)
    newsub1=sorted(sub1)

    for y in range(len(sub1)):

        subf=newsub1[y]
        try:
            dictd[subf]
            
        except:
            
            dictd[subf]={}
            dictd[subf]['files']=[]
            dictd[subf]['date']=[]

        dictd[subf]['files'].append(newsub[y])
        dictd[subf]['date'].append(os.path.getmtime(newsub[y]))

    return sub1,dictd
    
    
    
def sort_by_datetime(subfolders,dictt):
    newdict={}
    newdictwithdate={}#带时间的表达方式,懒得写
    
    if len(subfolders)>1:
        sub=sorted(list(set(subfolders)))
        print(sub)
        for i in sub:
            j=dictt[i]
            
            
            tempsortdate=sorted(j['date'])
            newdict[i]={}
            newdict[i]['files']=[]
            for k in tempsortdate:
                idx=j['date'].index(k)

                newdict[i]['files'].insert(idx,j['files'][idx])

            newdict[i]['sortdate']=tempsortdate
            tempsortdate=[]#加入新排序后的子路径后把暂存清空
    else:
        pass#如果没有subfolder...懒得写
    return newdict


sub,dictd=cft('C:\\Users\\kt.si\\Downloads\\a\\')

dicte=sort_by_datetime(sub,dictd)


import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(dicte)

吐槽一下，os.walk出来的文件是乱序的，乱到不行那种，所以要做sorted，sorted 之后组一个字典，再放到sort_by_datetime做一下[‘date’]顺序和再把索引置换一下[‘files’]的顺序，再生成新的字典

好像挺繁复的，搞了我很久，但其实根本不太需要做的这么麻烦

所以通常大佬骂过，用 os.listdir()，os.popen()它不更香吗，废话乱说，看下面

os.listdir() 方法

def loopfolderstreeuselistdir(fpath,folders=[],dird=[],dd=[]):
    
    cc=[]
    
    t=True
    
    if fpath is None:    
        for i in folders:
            dird=os.listdir(i)
            cc=[os.path.join(i,j) for j in dird]
            dd=dd+cc
            cc=[]
    else:
        dd=os.listdir(fpath)
        dd=[os.path.join(fpath,i) for i in dd]
    
    for i in dd:
        if not os.path.isfile(i):
            folders.append(i)
            t=False
        else:
            t=True
            #文件部分，日期排序可以使用sort_by_datetime()改改就好不用太注意细节了
            # try:
                    
                    # dictt[path]['files']
                # except:
                    # dictt[path]={}
                    # dictt[path]['files']=[]
                # dictt[path]['files'].append(k)
                # dictt[path]['files']=sorted(list(set(dictt[path]['files'])))
    if t==True:
        return sorted(list(set(folders)))
        
    return loopfolderstreeuselistdir(None,folders)


fpath='C:\\Users\\kt.si\\Downloads\\a\\'
folders=loopfolderstreeuselistdir(fpath)
###with dict
# print(folders)
# import pprint
# pp = pprint.PrettyPrinter(indent=4)
# pp.pprint(dictt)

##os.popen() 方法

def cmddir_listdir(fpath):
    dd=os.popen(f'dir /s {fpath}').read()
    dictt={}
    files=[]
    dirdd=''
    def get_files_withdate(line,dirdd):

        
        

        if line.find('的目錄')>-1:#系统是繁体，所以这三个字也就是繁体咯
            dirdd=str().join(line[:-4])
            dictt[dirdd]={}
            # print(dirdd)
        else:
   
            y=str().join(line.split(' ')[-1])
            x=os.path.join(dirdd,y)
            
            
            
       
            
            if os.path.isfile(x.strip()):
                 # 因为输出已经不是乱序，而且时间标识出来，所以比较简单，只需要在这里获取就可以了
                date=str().join(line.split(' ')[0:5])
                try:
                    
                    dictt[dirdd]['files']
                    
                except:
                    dictt[dirdd]['files']=[]
                    dictt[dirdd]['date']=[]
                dictt[dirdd]['files'].append(x.strip())
                dictt[dirdd]['date'].append(date.strip())
        return dirdd,dictt
                
                
    
          
    for i in dd.split('\n')[:-4]:
        
        dirdd,dictt=get_files_withdate(i,dirdd)
    return dictt

dictt=cmddir_listdir(fpath)

pp = pprint.PrettyPrinter(indent=4)
pp.pprint(dictt)