深度学习猫狗图片分类之建立train/valid文件夹
## 从www.kaggle.com/c/dogs-vs-cats/data 下载数据,数据包含25000个如cat.0.jpg, dag.0.jpg图片,目的建立train和valid文件夹,分别建立cat和dog子文件夹,并存图片。train 23000张, valid 2000张图片。
import glob
import os # operating system
import shutil # Advanced file, folder, compressed package processing module
import re
import numpy as np
Image_path = './train/'
files = glob.glob(os.path.join(Image_path,'*.jpg'))
no_of_images = len(files)
shuffle = np.random.permutation(no_of_images) # random permutation
data_dir = 'dogsandcats data'
if os.path.exists(data_dir): shutil.rmtree(data_dir) # delete a directory tree
os.mkdir(data_dir)
for t in ['valid','train']:
for folder in ['dog/','cat/']:
os.makedirs(os.path.join(data_dir,t,folder))
if t == 'valid':
# copy a small subset of images into the validation folder
for i in shuffle[:2000]: # fetch front 2000 images
folder = re.split(r'[/\\]', files[i])[-1].split('.')[0] # return such as 'cat' according to pic's name
image = re.split(r'[/\\]', files[i])[-1] # return such as 'cat.0.jpg'
os.rename(files[i], os.path.join('./' + data_dir, t, folder, image))
else:
# copy a small subset of images into the training folder
for i in shuffle[2000:]: # other images
folder = re.split(r'[/\\]', files[i])[-1].split('.')[0] # return such as 'cat' according to pic's name
image = re.split(r'[/\\]', files[i])[-1] # return such as 'cat.0.jpg'
os.rename(files[i], os.path.join('./' + data_dir, t, folder, image))
效果如下:
为后续训练/验证作准备。