内容涉及:关键字定位,列表去重复,路径组装,文件夹创建,文件拷贝,字符串分割
list.txt的内容为包含关键字的文件路径,如:关键字 ’181‘
org/20190523/1/2019052320196231816923IMG039x030.JPG_n_1_1375_220_1475_320.png
org/20190523/7/2019052320196171812302IMG007x030.JPG_p_7_287_191_387_291.png
... ...
import os
import numpy as np
import re
from shutil import copyfile
def getlist(path,name):
listall = []
for n in open(path):
location = re.search(name, n) #定位关键字
bingli_Id = n[location.start():location.start()+7] #找到关键字后面7位长度字符
listall.append(bingli_Id) #添加到列表中
return set(listall) #去除重复内容
def mkdir(file1, bingId): #建立文件夹
if not os.path.exists(file1):
os.mkdir(file1) #建立一级目录
for n in bingId:
path = os.path.join(file1, n)
path_1 = os.path.join(path, str(1)) #组装三级目录,此时我知道三级目录的内容,所以直接设置了常量
path_7 = os.path.join(path, str(7))
if not os.path.exists(path): #建立目录
os.mkdir(path)
if not os.path.exists(path_1):
os.mkdir(path_1)
if not os.path.exists(path_7):
os.mkdir(path_7)
def copytoflods(path_all_list, name, file1):
for n in open(path_all_list):
location = re.search(name, n)
bingId = n[location.start():location.start()+7]
arr = n.split('_') #以‘_’分割字符串
type_ = arr[2] # 取第3个元素
root = os.path.join(file1, bingId, type_, arr[0][-20:]+ '.png') #组装文件路径
copyfile(n[:-1], root) #n[:-1]:便捷地去除换行符号
if __name__ == '__main__':
listall = getlist('./list.txt','181') #第一个参数为需要处理的文件路径列表, 181为关键字
mkdir('dataset', listall)
copytoflods('./list.txt','181', 'dataset') #dataset为一级目录名