定义数据集
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
加载一个猫狗的二分类图像数据集到本地
_URL = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
path_to_zip = tf.keras.utils.get_file('cats_and_dogs.zip', origin=_URL, extract=True)
PATH = os.path.join(os.path.dirname(path_to_zip), 'cats_and_dogs_filtered')
print(PATH)
tensorflow2 —训练猫狗分类
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import models,layers,metrics,optimizers,losses,callbacks
import os
数据
open("cats_and_dogs_filtered")
PATH="cats_and_dogs_filtered"
train_image_generator = ImageDataGenerator(rescale=1/255,
rotation_range=30,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True
)
validation_image_generator = ImageDataGenerator(rescale=1/255)
batch_size = 128
epochs = 15
IMG_HEIGHT = 32
IMG_WIDTH =32
train_dir = os.path.join(PATH, 'train')
validation_dir = os.path.join(PATH, 'validation')
train_data_gen = train_image_generator.flow_from_directory(batch_size=batch_size,
directory=train_dir,
shuffle=True,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary')
val_data_gen = validation_image_generator.flow_from_directory(batch_size=batch_size,
directory=validation_dir,
target_size=(IMG_HEIGHT, IMG_WIDTH),
class_mode='binary')
模型
def my_model():
model=models.Sequential([
layers.InputLayer((32,32,3)),
layers.Conv2D(18,(3,3),padding="same",activation="relu"),
layers.MaxPooling2D((2,2),2),
layers.Conv2D(36, (3, 3), padding="same", activation="relu"),
layers.MaxPooling2D((2, 2), 2),
layers.Conv2D(48, (3, 3), padding="same", activation="relu"),
layers.MaxPooling2D((2, 2), 2),
layers.BatchNormalization(),
layers.Flatten(),
layers.Dense(1024,activation="relu"),
layers.Dense(512,activation="relu"),
layers.Dense(2,activation="softmax")
])
model.summary()
model.compile(optimizer=optimizers.Adam(),
loss=losses.sparse_categorical_crossentropy,
metrics=['accuracy'])
return model
model=my_model()
训练
epochs = 15
steps_per_epoch = train_data_gen.n // train_data_gen.batch_size
validation_steps = val_data_gen.n // val_data_gen.batch_size
print(train_data_gen.n,train_data_gen.batch_size,steps_per_epoch)
print(val_data_gen.n,val_data_gen.batch_size,validation_steps)
# history=model.fit(train_data_gen,epochs=epochs,batch_size=batch_size,validation_data=val_data_gen)
history = model.fit_generator(
train_data_gen,
steps_per_epoch=steps_per_epoch,
epochs=epochs,
validation_steps=validation_steps,
validation_data=val_data_gen,
)
由于train_data_gen和validation_data都是生成器函数,每次训练的时候不停生成批量的训练和测试数据集。steps_per_epoch表示从生成器中抽取total_train // batch_size轮样本后,结束训练停止生成器继续生成数据。
————————————————
tf.keras.preprocessing;数据预处理,主要使用读取图片的API,如下面前三行:
包括从directory或者dataframe读取图片、随机亮度 裁剪 旋转 缩放等等
tf.keras.preprocessing.image.ImageDataGenerator,
tf.keras.preprocessing.image.load_img,
tf.keras.preprocessing.image.random_brightness,
tf.keras.preprocessing.image.random_rotation
tf.keras.preprocessing.image.random_shear,
tf.keras.preprocessing.image.random_zoom
tf.keras.preprocessing.sequence,
tf.keras.preprocessing.text;
train_datagen = keras.preprocessing.image.ImageDataGenerator(
rescale = 1./255,
rotation_range = 40,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True,
fill_mode = 'nearest',)
train_generator = train_datagen.flow_from_dataframe(
train_df,
directory = './',
x_col = 'filepath',
y_col = 'class',
classes = class_names,
target_size = (height, width),
batch_size = batch_size,
seed = 7,
shuffle = True,
class_mode = 'sparse',)
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size = (height, width),
batch_size = batch_size,
seed = 7,
shuffle = True,
class_mode = "categorical")
##ImageDataGenerator只定义了对读取出来的图片进行的预处理,但是此时他还不确定是从文件夹还是从dataframe中读取,如果是dataframe中读取,那么dataframe的结构一定是有两个feature_column,并且一个是每一张图片的路径,一个是该图片的所属类别,如下所示:
————————————————
from tensorflow.keras.preprocessing.image import ImageDataGenerator
data=ImageDataGenerator(rescale=1/255).flow_from_dataframe(...)貌似不好用
data=ImageDataGenerator(rescale=1/255).flow_from_directory(...)一般用这个
torch – 训练猫狗分类
my_data
import torch
from torch.utils.data import dataloader,DataLoader
from torchvision import datasets,transforms
# open("../cats_and_dogs_filtered")
def loaddata():
train_path="../cats_and_dogs_filtered/train"
test_path="../cats_and_dogs_filtered/validation"
train_dataset=datasets.ImageFolder(train_path,transform=transforms.Compose([
transforms.Resize([32,32]),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5])
]))
train_loader=DataLoader(train_dataset,shuffle=True,batch_size=128)
test_dataset = datasets.ImageFolder(test_path, transform=transforms.Compose([
transforms.Resize([32, 32]),
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
]))
test_loader=DataLoader(test_dataset,shuffle=True)
return train_loader,test_loader
train_loader,test_loader=loaddata()
# print(train_loader.classes)
# print(train_loader.class_to_idx)
# print(train_loader.imgs)
my_model
import torch
from torchsummary import summary
import torch.nn as nn
class m_model(nn.Module):
def __init__(self):
super(m_model, self).__init__()
self.layer1=nn.Sequential(
nn.Conv2d(3,18,(3,3),1,1),
nn.ReLU(True),
nn.MaxPool2d((2,2),2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(18, 36, (3, 3), 1, 1),
nn.ReLU(True),
nn.MaxPool2d((2, 2), 2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(36, 48, (3, 3), 1, 1),
nn.ReLU(True),
nn.MaxPool2d((2, 2), 2)
)
self.layer4=nn.Sequential(
nn.BatchNorm2d(48),
nn.Flatten(),
nn.Linear(768,2)
)
def forward(self,x):
x=self.layer1(x)
x=self.layer2(x)
x=self.layer3(x)
x=self.layer4(x)
return x
if __name__ == '__main__':
model=m_model()
summary(model,(3,32,32))
my_train
from torch_7_11.q_model import m_model
from torch_7_11.q_data import loaddata
import torch.nn as nn
import torch
model=m_model()
train_data,tese_data=loaddata()
optimizer=torch.optim.Adam(model.parameters(),lr=0.001)
lossfun=nn.CrossEntropyLoss()
epochs=10
for epoch in range(epochs):
for i ,data in enumerate(train_data):
x,y=data
optimizer.zero_grad()
h=model(x)
loss=lossfun(h,y)
loss.backward()
optimizer.step()
_,pr=torch.max(h,1)
acc=(pr==y).float().mean().item()
test_acc=0
loss_acc=0
for x,y in tese_data:
h=model(x)
loss = lossfun(h, y)
loss_acc+=loss.item()/len(tese_data)
_, pr = torch.max(h, 1)
test_acc += (pr == y).float().item()/len(tese_data)
print(epoch+1,i+1,loss.item(),acc,loss_acc,test_acc)