项目说明
数据集说明 oxford-iiit-pet
数据官方网站:
https://www.robots.ox.ac.uk/~vgg/data/pets/
下载 .torrent
文件,然后下载完整文件。
主要分为 images 和 annotations 文件
images 由以下类别图片构成:
Abyssinian Ragdoll boxer keeshond scottish_terrier
Bengal Russian_Blue chihuahua leonberger shiba_inu
Birman Siamese english_cocker_spaniel miniature_pinscher staffordshire_bull_terrier
Bombay Sphynx english_setter newfoundland wheaten_terrier
British_Shorthair american_bulldog german_shorthaired pomeranian yorkshire_terrier
Egyptian_Mau american_pit_bull_terrier great_pyrenees pug
Maine_Coon basset_hound havanese saint_bernard
Persian beagle japanese_chin samoyed
大写字母开头的是猫;小写字母开头的是狗。
annotations/xmls/Abyssinian_1.xml
<annotation>
<folder>OXIIIT</folder>
<filename>Abyssinian_1.jpg</filename>
<source>
<database>OXFORD-IIIT Pet Dataset</database>
<annotation>OXIIIT</annotation>
<image>flickr</image>
</source>
<size>
<width>600</width>
<height>400</height>
<depth>3</depth>
</size>
<segmented>0</segmented>
<object>
<name>cat</name>
<pose>Frontal</pose>
<truncated>0</truncated>
<occluded>0</occluded>
<bndbox>
<xmin>333</xmin>
<ymin>72</ymin>
<xmax>425</xmax>
<ymax>158</ymax>
</bndbox>
<difficult>0</difficult>
</object>
</annotation>
代码实现
数据处理
import os
import shutil
import random
整理原始数据-分类
将零散的数据,根据名字放到不同文件夹
def split_types(data_dir="images/",ret_dir="dataset/"):
'''
归类图像到不同目录中
'''
for file_name in os.listdir(data_dir):
file_path = os.path.join(data_dir, file_name)
type_dir = os.path.join(ret_dir + file_name.split('_')[0].strip())
if not os.path.exists(type_dir):os.makedirs(type_dir)
shutil.copy(file_path, type_dir+one_pic)
划分 训练集和测试集
origin_dir = '/Users/luyi/Downloads/oxford-iiit-pet/images/'
save_dir = ''
# 对原始数据集 划分 训练集和测试集
def random2Dataset(data_dir=origin_dir,ratio=0.3):
label_list = os.listdir(data_dir)
for label in label_list:
if label.startswith('.'):continue
type_dir = os.path.join(data_dir, label)
train_dir_type = os.path.join(save_dir, 'train', label)
test_dir_type = os.path.join(save_dir, 'test', label)
if not os.path.exists(train_dir_type):os.makedirs(train_dir_type)
if not os.path.exists(test_dir_type):os.makedirs(test_dir_type)
file_names = os.listdir(type_dir)
test_num = int(len(file_names) * ratio)
print('-- ', label, test_num, )
random.shuffle(file_names)
#创建测试集
for file_name in file_names[:test_num]:
if file_name.startswith('.'):continue
file_path = os.path.join(type_dir, file_name)
save_path = os.path.join(test_dir_type, file_name)
shutil.move(one_path, new_path)
#创建训练集
for file_name in file_names[test_num:]:
if file_name.startswith('.'):continue
file_path = os.path.join(type_dir, file_name)
save_path = os.path.join(train_dir_type, file_name)
shutil.move(one_path, new_path)
2023-02-28