transfer learning

最新推荐文章于 2021-06-15 19:26:14 发布

AI周红伟

最新推荐文章于 2021-06-15 19:26:14 发布

阅读量209

点赞数

分类专栏：短视频

本文链接：https://blog.csdn.net/starzhou/article/details/106629391

版权

短视频专栏收录该内容

367 篇文章 21 订阅

订阅专栏

import cv2  # working with, mainly resizing, images
import numpy as np  # dealing with arrays
import os  # dealing with directories
from random import shuffle  # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm  # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion
import pandas as pd
import glob
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as k
from matplotlib.pyplot import imshow
from keras.applications.imagenet_utils import preprocess_input
import imageio
from IPython.display import SVG
from keras.utils import plot_model
from keras.utils.vis_utils import model_to_dot
from keras.models import load_model
import keras
import numpy as np
IMG_SIZE = 224
training_data = []

# LABEL_DIR = '/home/star/ai/pythonobj/kagglecatsanddogs_3367a/PetImages/'
# TRAIN_DIR = LABEL_DIR + '*'
#
# label_list = os.listdir(LABEL_DIR)
# # 获取多个文件夹的路径，并返回一个可迭代对象
# dirPath = glob.iglob(TRAIN_DIR)
#
# # 将可爹地啊对象进行循环获取，赋值给 big_file
# def create_train_data():
#     training_data = []
#     index = 0
#     for big_file in dirPath:
#         # 获取每个文件夹下的文件名并赋值给 file
#         files = os.listdir(big_file)
#         print(files)
#         label = label_list[index]
#         if(label == 'Cat'):
#             lable_id = 0
#         else:
#             lable_id = 1
#
#         # 将获取的所有文件名进行循环判断
#         for file in files:
#             #print(file, label)
#             path = os.path.join(big_file, file)
#             img = cv2.imread(path, cv2.IMREAD_COLOR)
#             if img is None:
#                 continue
#
#             img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
#             training_data.append([np.array(img), np.array(lable_id)])
#
#         index += 1
#
#     shuffle(training_data)
#     np.save('train_data.npy', training_data)
#     return training_data
#
#
# train_data = create_train_data()

# # If you have already created the dataset:
# train_data = np.load('/home/star/ai/pythonobj/train_data.npy', allow_pickle=True)
#
# train = train_data[:24000]
# test = train_data[24000:]
#
# X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
# Y = np.array([i[1] for i in train])
# test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
# test_y = np.array([i[1] for i in test])
#
#
# # parameters have been trained with the 'imagenet' dataset (1000 classes)
# base_model = ResNet50(weights='imagenet', include_top=False)
# x = base_model.output
# # add top layers which we will learn by our target dataset
# x = GlobalAveragePooling2D()(x)
# x = Dense(1024, activation='relu')(x)
# predictions = Dense(1, activation='sigmoid')(x)
# # new model!
# model = Model(inputs=base_model.input, outputs=predictions)
#
# # lock the basemodel, so we can just learn the parameters of the top layers
# for layer in base_model.layers:
#     layer.trainable = False
#
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])# If you have already created the dataset:
train_data = np.load('/home/star/ai/pythonobj/train_data.npy', allow_pickle=True)

train = train_data[:24000]
test = train_data[24000:]

X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
Y = np.array([i[1] for i in train])
test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
test_y = np.array([i[1] for i in test])
#
#
# parameters have been trained with the 'imagenet' dataset (1000 classes)
base_model = ResNet50(weights='imagenet',
                      include_top=False,
                      input_shape=(IMG_SIZE, IMG_SIZE, 3),
                      pooling='avg')

# lock the basemodel, so we can just learn the parameters of the top layers
for layer in base_model.layers:
    layer.trainable = False

model = keras.Sequential()
model.add(base_model)
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# x = base_model.output
# # add top layers which we will learn by our target dataset
# #x = GlobalAveragePooling2D()(x)
# x = Dense(1024, activation='relu')(x)
# predictions = Dense(1, activation='sigmoid')(x)
# new model!
#model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#step 1:train dataset(4000 samples)
model.fit(X, Y, epochs=3, batch_size=64)

# step 2:test dataset
preds = model.evaluate(test_x, test_y)
print("Loss = " + str(preds[0]))
print("Test Accuracy = " + str(preds[1]))

model.save('catdog_4000.h5')
model = load_model('catdog_4000.h5')

# step 3:test with my image
img_path = '/home/star/ai/pythonobj/cifar10/dog1.jpeg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
my_image = imageio.imread(img_path)
imshow(my_image)
result = model.predict(x)
if result < 0.5:
    print("this is a cat!")
else:
    print("this is a dog!")


model.summary()
plot_model(model, to_file='model.png')

base_model.summary()
plot_model(base_model, to_file='model.png')

-------------------------------------

所谓极速实现，是代码不超过100行啊，很爽啊

preprocess_image.py 这个是图片预处理，我用了

点击打开链接

这个库的数据，可是我的电脑处理不了那么大的数据，就随便选了3500张作为训练，500张用于测试。

每张图片规格调整到(224,224,3)，所以最后训练集维度是(3500,224,224,3)，测试集维度是(500,224,224,3)

import cv2 # working with, mainly resizing, images
import numpy as np # dealing with arrays
import os # dealing with directories
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion

TRAIN_DIR = 'D:/Documents/GitHub/deep-learning-models/dataset/train_wu'
TEST_DIR = 'D:/Documents/GitHub/deep-learning-models/dataset/test'
IMG_SIZE = 224

def label_img(img):
word_label = img.split('.')[-3]
# conversion to one-hot array [cat,dog]
# [much cat, no dog]
if word_label == 'cat': return [1,0]
# [no cat, very doggo]
elif word_label == 'dog': return [0,1]

def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = label_img(img)
path = os.path.join(TRAIN_DIR,img)
img = cv2.imread(path,cv2.IMREAD_COLOR)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy', training_data)
return training_data

def process_test_data():
testing_data = []
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR,img)
img_num = img.split('.')[0]
img = cv2.imread(path,cv2.IMREAD_COLOR)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
testing_data.append([np.array(img), img_num])

shuffle(testing_data)
np.save('test_data.npy', testing_data)
return testing_data

train_data = create_train_data()
# If you have already created the dataset:
#train_data = np.load('train_data.npy')
#test_data = process_test_data()
#test_data = np.load('test_data.npy')

train = train_data[:-500]
test = train_data[-500:]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
Y = np.array([i[1] for i in train])
test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
test_y = np.array([i[1] for i in test])

我用了keras的Resnet50(参数是imagenet的训练结果)作为transfer learning的base model，然后去掉最后几层，并重新加了了个2类输出结果的softmax层和全连接层和一个GlobalAveragePooling2D()层。
CAT.py

# -*- coding: utf-8 -*-
"""
Created on Fri Dec 1 16:22:52 2017
@author: Administrator
"""

from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as k
from matplotlib.pyplot import imshow
from keras.applications.imagenet_utils import preprocess_input
import imageio
from IPython.display import SVG
from keras.utils import plot_model
from keras.utils.vis_utils import model_to_dot
from keras.models import load_model
import numpy as np

# parameters have been trained with the 'imagenet' dataset (1000 classes)
base_model = ResNet50(weights='imagenet',include_top=False)
x = base_model.output
# add top layers which we will learn by our target dataset
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation= 'relu')(x)
predictions = Dense(2, activation='softmax')(x)
# new model!
model = Model(inputs = base_model.input, outputs= predictions)

# lock the basemodel, so we can just learn the parameters of the top layers
for layer in base_model.layers:
layer.trainable = False

model.compile(optimizer='rmsprop', loss='categorical_crossentropy',metrics=['accuracy'])

# step 1:train dataset(4000 samples)
model.fit(X,Y,epochs=3,batch_size = 64)

# step 2:test dataset
preds = model.evaluate(test_x, test_y)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

#model.save('catdog_4000.h5')
model = load_model('catdog_4000.h5')

# step 3:test with my image
img_path = 'cat2.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
my_image = imageio.imread(img_path)
imshow(my_image)
result = model.predict(x)
if result[0][0]>result[0][1]:
print("this is a cat!")
else:
print("this is a dog!")

#model.summary()
#plot_model(model, to_file='model.png')
#SVG(model_to_dot(model).create(prog='dot', format='svg'))