想将图片集目录中的每个目录随机抽取X个样本用于测式,并重以md5命名文件。python代码如下
import os, random, shutil
import hashlib
def rand_sampling(pathDir, n):
"""
:param number_set: 数字列表
:param n: 组合数位数
:return: 组合数
"""
result = []
for i in range(1, n+1):
pic = random.choice(pathDir)
result.append(pic)
pathDir.remove(pic)
return result
def file_md5(file_name ):
if not os.path.isfile(file_name):
return
with open(file_name, 'rb') as fp:
data = fp.read()
return hashlib.md5(data).hexdigest()
def moveFile(source,aim,picknumber):
pathDir = os.listdir(source) #取图片的原始路径
if len(pathDir)<= picknumber:
sample = pathDir
else:
sample = rand_sampling(pathDir, picknumber)
print(source)
print("len:",len(pathDir))
for name in sample:
s = os.path.join(source, name)
a = os.path.join(aim, name)
md5_name=file_md5(s)+".jpg"
#shutil.copyfile(source+name, aim+name)
shutil.copyfile(source + name, aim + md5_name)
return
if __name__ == '__main__':
picknumber = 10 #每次抽取的数量
fileDir = r"E:\pictures\data\for_hegui" #源图片文件夹路径
tarDir = r'E:\测试图集\随机抽取' #移动到新的文件夹路径
shutil.rmtree(tarDir) #清空新的文件夹路径
for root, dirs, files in os.walk(fileDir):
for d in dirs:
source = os.path.join(fileDir, d) +'/'
aim = os.path.join(tarDir, d)+'/'
if not os.path.exists(aim):
os.makedirs(aim)
moveFile(source,aim,picknumber)
可以保证每次抽取数量
下面代码是抽取后,自动将英文文件夹改名为中文的,便于人工测试,只要在Dictionary_chineseDirs 中写好对应关键即可。
import os, random, shutil
import hashlib
def rand_sampling(pathDir, n):
"""
:param number_set: 数字列表
:param n: 组合数位数
:return: 组合数
"""
result = []
for i in range(1, n+1):
pic = random.choice(pathDir)
result.append(pic)
pathDir.remove(pic)
return result
def file_md5(file_name ):
if not os.path.isfile(file_name):
return
with open(file_name, 'rb') as fp:
data = fp.read()
return hashlib.md5(data).hexdigest()
def moveFile(source,aim,picknumber):
pathDir = os.listdir(source) #取图片的原始路径
if len(pathDir)<= picknumber:
sample = pathDir
else:
sample = rand_sampling(pathDir, picknumber)
print(source)
print("len:",len(pathDir))
for name in sample:
s = os.path.join(source, name)
a = os.path.join(aim, name)
md5_name=file_md5(s)+".jpg"
#shutil.copyfile(source+name, aim+name)
shutil.copyfile(source + name, aim + md5_name)
return
if __name__ == '__main__':
picknumber = 50 #每次抽取的数量
fileDir = r"E:\pictures\data\for_hegui" #源图片文件夹路径
tarDir = r'E:\测试图集\随机抽取' #移动到新的文件夹路径
if not os.path.exists(tarDir):
os.makedirs(tarDir)
else:
shutil.rmtree(tarDir) # 清空新的文件夹路径
Dictionary_chineseDirs = {
'document':'文档',
'drawings':'一般卡通',
'group_photo':'集体照',
'inform':'广告海报',
'neutral':'正常',
'people':'人像',
'redheadfile':'红头文件',
'suit':'西装',
}
for root, dirs, files in os.walk(fileDir):
for d in dirs:
source = os.path.join(fileDir, d) +'/'
if( d in Dictionary_chineseDirs.keys()):
chineseDir=Dictionary_chineseDirs[d]
else:
chineseDir=d
#aim = os.path.join(tarDir, d)+'/'
aim = os.path.join(tarDir, chineseDir) + '/'
if not os.path.exists(aim):
os.makedirs(aim)
moveFile(source,aim,picknumber)