对于训练集图片分类记录在csv文件中的情况:
有两种处理方式:
一、将训练集图片分为x_train和y_train:
#读入数据
labels=pd.read_csv('labels.csv')
sample_submission = pd.read_csv( 'sample_submission.csv')
#进行one-hot编码,并转为np.array
targets_series=pd.Series(labels['breed'])
one_hot=pd.get_dummies(targets_series,sparse=True)
one_hot_labels = np.asarray(one_hot)
#用opencv读取图片
img_size=299
x_train=[]
y_train=[]
x_test=[]
i=0
for f,breed in tqdm(labels.values):
img=cv2.imread('train/{}.jpg'.format(f))
label=one_hot_labels[i]
x_train.append(cv2.resize(img,(img_size,img_size)))
y_train.append(label)
i+=1
for f in tqdm(sample_submission['id'].values):
img=cv2.imread('test/{}.jpg'.format(f))
x_test.append(cv2.resize(img,(img_size,img_size)))
#将图片处理为已有模型所需的输入
y_train_raw = np.array(y_train, np.uint8)
x_train