import numpy as np # linear algebraimport pandas as pd # data processing, CSV file import os
# Import需要的库import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import time
# 用于获取测试图片
file_inform_path =''for dirname, _, filenames in os.walk('/kaggle/input'):for filename in filenames:print(os.path.join(dirname, filename))if file_inform_path =='':
file_inform_path = os.path.join(dirname, filename)
获取一个样例文件作参考
# Load an color image in color
img = cv2.imread(file_inform_path,1)
plt.imshow(img)
sp = img.shape
print('width:%d height:%d number:%d'%(sp[0],sp[1],sp[2]))
将文件读取到变量中
defreadfile(path, label):# label 是一個 boolean variable,代表需不需要回傳 y 值
image_dir =sorted(os.listdir(path))# 先创建数组
x = np.zeros((len(image_dir),128,128,3), dtype=np.uint8)# y是标签
y = np.zeros((len(image_dir)), dtype=np.uint8)for i,fileinenumerate(image_dir):
img = cv2.imread(os.path.join(path,file))# 更改尺寸到128*128
x[i,:,:]= cv2.resize(img,(128,128))# 将label读入y中if label:
y[i]=int(file.split("_")[0])if label:return x, y
else:return x
调用函数读取
# 分別將 training set、validation set、testing set 用 readfile 函式讀進來
workspace_dir ='/kaggle/input/ml2020spring-hw3/food-11'print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir,"training"),True)print("Size of training data = {}".format(len(train_x)))
val_x, val_y = readfile(os.path.join(workspace_dir,"validation"),True)print("Size of validation data = {}".format(len(val_x)))
test_x = readfile(os.path.join(workspace_dir,"testing"),False)print("Size of Testing data = {}".format(len(test_x)))
# testing 時不需做 data augmentation
test_transform = transforms.Compose([
transforms.ToPILImage(),
transforms.ToTensor(),])
Dataset是Pytorch中用来表示数据集的类
复写方法如下
classImgDataset(Dataset):def__init__(self, x, y=None, transform=None):# 顺便一提,python在默认构造函数的时候,直接取等相当于声明变量
self.x = x
# label is required to be a LongTensor# 这个原因我不是很明白,虽然改了后面会报错,但是为什么Label要用Long类型
self.y = y
if y isnotNone:
self.y = torch.LongTensor(y)
self.transform = transform
def__len__(self):returnlen(self.x)def__getitem__(self, index):
X = self.x[index]if self.transform isnotNone:
X = self.transform(X)if self.y isnotNone:
Y = self.y[index]return X, Y
else:return X
classCustomDataset(data.Dataset):#需要继承data.Datasetdef__init__(self):# TODO# 1. Initialize file path or list of file names.passdef__getitem__(self, index):# TODO# 1. Read one data from file (e.g. using numpy.fromfile, PIL.Image.open).# 2. Preprocess the data (e.g. torchvision.Transform).# 3. Return a data pair (e.g. image and label).#这里需要注意的是,第一步:read one data,是一个datapassdef__len__(self):# You should change 0 to the total size of your dataset.return0