transfer learning

import cv2  # working with, mainly resizing, images
import numpy as np  # dealing with arrays
import os  # dealing with directories
from random import shuffle  # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm  # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion
import pandas as pd
import glob
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as k
from matplotlib.pyplot import imshow
from keras.applications.imagenet_utils import preprocess_input
import imageio
from IPython.display import SVG
from keras.utils import plot_model
from keras.utils.vis_utils import model_to_dot
from keras.models import load_model
import keras
import numpy as np
IMG_SIZE = 224
training_data = []

# LABEL_DIR = '/home/star/ai/pythonobj/kagglecatsanddogs_3367a/PetImages/'
# TRAIN_DIR = LABEL_DIR + '*'
#
# label_list = os.listdir(LABEL_DIR)
# # 获取多个文件夹的路径,并返回一个可迭代对象
# dirPath = glob.iglob(TRAIN_DIR)
#
# # 将可爹地啊对象进行循环获取,赋值给 big_file
# def create_train_data():
#     training_data = []
#     index = 0
#     for big_file in dirPath:
#         # 获取每个文件夹下的文件名并赋值给 file
#         files = os.listdir(big_file)
#         print(files)
#         label = label_list[index]
#         if(label == 'Cat'):
#             lable_id = 0
#         else:
#             lable_id = 1
#
#         # 将获取的所有文件名进行循环判断
#         for file in files:
#             #print(file, label)
#             path = os.path.join(big_file, file)
#             img = cv2.imread(path, cv2.IMREAD_COLOR)
#             if img is None:
#                 continue
#
#             img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
#             training_data.append([np.array(img), np.array(lable_id)])
#
#         index += 1
#
#     shuffle(training_data)
#     np.save('train_data.npy', training_data)
#     return training_data
#
#
# train_data = create_train_data()

# # If you have already created the dataset:
# train_data = np.load('/home/star/ai/pythonobj/train_data.npy', allow_pickle=True)
#
# train = train_data[:24000]
# test = train_data[24000:]
#
# X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
# Y = np.array([i[1] for i in train])
# test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
# test_y = np.array([i[1] for i in test])
#
#
# # parameters have been trained with the 'imagenet' dataset (1000 classes)
# base_model = ResNet50(weights='imagenet', include_top=False)
# x = base_model.output
# # add top layers which we will learn by our target dataset
# x = GlobalAveragePooling2D()(x)
# x = Dense(1024, activation='relu')(x)
# predictions = Dense(1, activation='sigmoid')(x)
# # new model!
# model = Model(inputs=base_model.input, outputs=predictions)
#
# # lock the basemodel, so we can just learn the parameters of the top layers
# for layer in base_model.layers:
#     layer.trainable = False
#
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])# If you have already created the dataset:
train_data = np.load('/home/star/ai/pythonobj/train_data.npy', allow_pickle=True)

train = train_data[:24000]
test = train_data[24000:]

X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
Y = np.array([i[1] for i in train])
test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
test_y = np.array([i[1] for i in test])
#
#
# parameters have been trained with the 'imagenet' dataset (1000 classes)
base_model = ResNet50(weights='imagenet',
                      include_top=False,
                      input_shape=(IMG_SIZE, IMG_SIZE, 3),
                      pooling='avg')

# lock the basemodel, so we can just learn the parameters of the top layers
for layer in base_model.layers:
    layer.trainable = False

model = keras.Sequential()
model.add(base_model)
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# x = base_model.output
# # add top layers which we will learn by our target dataset
# #x = GlobalAveragePooling2D()(x)
# x = Dense(1024, activation='relu')(x)
# predictions = Dense(1, activation='sigmoid')(x)
# new model!
#model = Model(inputs=base_model.input, outputs=predictions)

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

#step 1:train dataset(4000 samples)
model.fit(X, Y, epochs=3, batch_size=64)

# step 2:test dataset
preds = model.evaluate(test_x, test_y)
print("Loss = " + str(preds[0]))
print("Test Accuracy = " + str(preds[1]))

model.save('catdog_4000.h5')
model = load_model('catdog_4000.h5')

# step 3:test with my image
img_path = '/home/star/ai/pythonobj/cifar10/dog1.jpeg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
my_image = imageio.imread(img_path)
imshow(my_image)
result = model.predict(x)
if result < 0.5:
    print("this is a cat!")
else:
    print("this is a dog!")


model.summary()
plot_model(model, to_file='model.png')

base_model.summary()
plot_model(base_model, to_file='model.png')

 

-------------------------------------

所谓极速实现,是代码不超过100行啊,很爽啊

preprocess_image.py 这个是图片预处理,我用了

点击打开链接

这个库的数据,可是我的电脑处理不了那么大的数据,就随便选了3500张作为训练,500张用于测试。

每张图片规格调整到(224,224,3),所以最后训练集维度是(3500,224,224,3),测试集维度是(500,224,224,3)

import cv2                 # working with, mainly resizing, images
import numpy as np         # dealing with arrays
import os                  # dealing with directories
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm      # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion
 
TRAIN_DIR = 'D:/Documents/GitHub/deep-learning-models/dataset/train_wu'
TEST_DIR = 'D:/Documents/GitHub/deep-learning-models/dataset/test'
IMG_SIZE = 224
 
 
def label_img(img):
    word_label = img.split('.')[-3]
    # conversion to one-hot array [cat,dog]
    #                            [much cat, no dog]
    if word_label == 'cat': return [1,0]
    #                             [no cat, very doggo]
    elif word_label == 'dog': return [0,1]
    
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR,img)
        img = cv2.imread(path,cv2.IMREAD_COLOR)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])
    shuffle(training_data)
    np.save('train_data.npy', training_data)
    return training_data
 
def process_test_data():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR,img)
        img_num = img.split('.')[0]
        img = cv2.imread(path,cv2.IMREAD_COLOR)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        testing_data.append([np.array(img), img_num])
        
    shuffle(testing_data)
    np.save('test_data.npy', testing_data)
    return testing_data
 
train_data = create_train_data()
# If you have already created the dataset:
#train_data = np.load('train_data.npy')
#test_data = process_test_data()
#test_data = np.load('test_data.npy')
 
train = train_data[:-500]
test = train_data[-500:]
 
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
Y = np.array([i[1] for i in train])
test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,3)
test_y = np.array([i[1] for i in test])


我用了keras的Resnet50(参数是imagenet的训练结果)作为transfer learning的base model,然后去掉最后几层,并重新加了了个2类输出结果的softmax层和全连接层和一个GlobalAveragePooling2D()层。
CAT.py

# -*- coding: utf-8 -*-
"""
Created on Fri Dec  1 16:22:52 2017
@author: Administrator
"""
 
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as k
from matplotlib.pyplot import imshow
from keras.applications.imagenet_utils import preprocess_input
import imageio
from IPython.display import SVG
from keras.utils import plot_model
from keras.utils.vis_utils import model_to_dot
from keras.models import load_model
import numpy as np
 
# parameters have been trained with the 'imagenet' dataset (1000 classes)
base_model = ResNet50(weights='imagenet',include_top=False)
x = base_model.output
# add top layers which we will learn by our target dataset
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation= 'relu')(x)
predictions = Dense(2, activation='softmax')(x)
# new model!
model = Model(inputs = base_model.input, outputs=   predictions)
 
# lock the basemodel, so we can just learn the parameters of the top layers
for layer in base_model.layers:
    layer.trainable = False
    
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',metrics=['accuracy'])
 
# step 1:train dataset(4000 samples)
model.fit(X,Y,epochs=3,batch_size = 64)
 
# step 2:test dataset
preds = model.evaluate(test_x, test_y)
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))
 
#model.save('catdog_4000.h5')
model = load_model('catdog_4000.h5')
 
# step 3:test with my image
img_path = 'cat2.jpg'
img = image.load_img(img_path, target_size=(224, 224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)
my_image = imageio.imread(img_path)
imshow(my_image)
result = model.predict(x)
if result[0][0]>result[0][1]:
    print("this is a cat!")
else:
    print("this is a dog!")
 
 
#model.summary()
#plot_model(model, to_file='model.png')
#SVG(model_to_dot(model).create(prog='dot', format='svg'))

我只训练了3个epoch(太慢了),训练集accuracy在96%左右。我们看看测试集的识别率:

 

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

AI周红伟

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值