数据集介绍:
Oxford-102 Flower是牛津工程大学于2008年发布的用于图像分类的花卉数据集,由8189张图像组成,包含103个花卉类别,训练集和验证集各包含10个图像,测试集由剩余的6129张图像组成,每个类别有40-250张图像。
论文链接:Automated flower classification over a large number of classes
数据集下载:
官网:Visual Geometry Group - University of Oxford
点击下载
1. Dataset images
4. The image labels
4. The data splits
组织数据集:
Dataset images下载为压缩包102flowers,解压后图片都保存在 ./102flowers/jpg路径下
组织数据集脚本:
import scipy.io
import numpy as np
import os
from PIL import Image
import shutil
########取出 imagelabels 文件的值############
imagelabels_path='./imagelabels.mat'
labels = scipy.io.loadmat(imagelabels_path)
labels = np.array(labels['labels'][0])-1
######## 取出 flower dataset: train test valid 数据id标识 ########
setid_path='./setid.mat'
setid = scipy.io.loadmat(setid_path)
validation = np.array(setid['valid'][0]) - 1
np.random.shuffle(validation)
train = np.array(setid['trnid'][0]) - 1
np.random.shuffle(train)
test=np.array(setid['tstid'][0]) -1
np.random.shuffle(test)
######## flower data path 数据保存路径 ########
flower_dir = list()
######## flower data dirs 生成保存数据的绝对路径和名称 ########
for img in os.listdir("./102flowers/jpg"):
######## flower data ########
flower_dir.append(os.path.join("./102flowers/jpg", img))
######## flower data dirs sort 数据的绝对路径和名称排序 从小到大 ########
flower_dir.sort()
#print(flower_dir)
#####生成flower data train的分类数据 #######
des_folder_train="./train"
for tid in train:
######## open image and get label ########
print(flower_dir[tid])
img=Image.open(flower_dir[tid])
######## resize img #######
img = img.resize((256, 256),Image.ANTIALIAS)
lable=labels[tid]
#print(lable)
path=flower_dir[tid]
#print("path:",path)
base_path=os.path.basename(path)
#print("base_path:",base_path)
######类别目录路径
classes="c"+str(lable)
class_path=os.path.join(des_folder_train,classes)
if not os.path.exists(class_path):
os.makedirs(class_path)
#print("class_path:",class_path)
despath=os.path.join(class_path,base_path)
#print("despath:",despath)
img.save(despath)
#####生成flower data validation的分类数据 #######
des_folder_validation="./val"
for tid in validation:
######## open image and get label ########
img=Image.open(flower_dir[tid])
#print(flower_dir[tid])
img = img.resize((256, 256),Image.ANTIALIAS)
lable=labels[tid]
#print(lable)
path=flower_dir[tid]
print("path:",path)
base_path=os.path.basename(path)
print("base_path:",base_path)
classes="c"+str(lable)
class_path=os.path.join(des_folder_validation,classes)
# 判断结果
if not os.path.exists(class_path):
os.makedirs(class_path)
print("class_path:",class_path)
despath=os.path.join(class_path,base_path)
print("despath:",despath)
img.save(despath)
#####生成flower data test的分类数据 #######
des_folder_test="./test"
for tid in test:
######## open image and get label ########
img=Image.open(flower_dir[tid])
#print(flower_dir[tid])
img = img.resize((256, 256),Image.ANTIALIAS)
lable=labels[tid]
#print(lable)
path=flower_dir[tid]
print("path:",path)
base_path=os.path.basename(path)
print("base_path:",base_path)
classes="c"+str(lable)
class_path=os.path.join(des_folder_test,classes)
# 判断结果
if not os.path.exists(class_path):
os.makedirs(class_path)
print("class_path:",class_path)
despath=os.path.join(class_path,base_path)
print("despath:",despath)
img.save(despath)
数据集结构: