仙人掌的图片识别

仙人掌的图片识别

数据来自kaggle的Aerial Cactus Identification项目(https://www.kaggle.com/c/aerial-cactus-identification)

下载数据

# 终端运行
kaggle competitions download -c aerial-cactus-identification
unzip train.zip
unzip test.zip
mkdir test_input
cp -r test/ test_input

# ls
# train.zip test.zip train test train.csv sample_submission.csv

train.csv是训练集的标签信息表

sample_submission.csv是kaggle最后上传结果的文件夹

导入tensorflow包

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import tensorflow as tf
from tensorflow import keras

import os
from shutil import copyfile, move
from tqdm import tqdm
import h5py

查看tensorflow版本和gpu是否可用

print(tf.__version__)
print(tf.test.is_gpu_available())
# 2.0.0-beta0
# False

整理数据

读取训练集信息表

training_df = pd.read_csv("train.csv")

将训练集数据分类,分为正负两个文件夹

src = "train/"
dst = "sorted_training/"

os.mkdir(dst)
os.mkdir(dst+"true")
os.mkdir(dst+"false")

with tqdm(total=len(list(training_df.iterrows()))) as pbar:
    for idx, row in training_df.iterrows():
        pbar.update(1)
        if row["has_cactus"] == 1:
            copyfile(src+row["id"], dst+"true/"+row["id"])
        else:
            copyfile(src+row["id"], dst+"false/"+row["id"])

将训练集的十分之一分到验证集,来验证结果

src = "sorted_training/"
dst = "sorted_validation/"

os.mkdir(dst)
os.mkdir(dst+"true")
os.mkdir(dst+"false")

validation_df = training_df.sample(n=int(len(training_df)/10))

with tqdm(total=len(list(validation_df.iterrows()))) as pbar:
    for idx, row in validation_df.iterrows():
        pbar.update(1)
        if row["has_cactus"] == 1:
            move(src+"true/"+row["id"], dst+"true/"+row["id"])
        else:
            move(src+"false/"+row["id"], dst+"false/"+row["id"])

构建模型

导入模型包

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Input
from tensorflow.keras.layers import Conv2D, Dense, Flatten, Dropout, Activation
from tensorflow.keras.layers import BatchNormalization, Reshape, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

使用ImageDataGenerator导入数据要求把数据整理为

data/
	sorted_training/
		true/
			1.jpg
			2.jpg
			3.jpg
		false/
			1.jpg
			2.jpg
			3.jpg
	sorted_validation/
		true/
			1.jpg
			2.jpg
			3.jpg
		false/
			1.jpg
			2.jpg
			3.jpg
	test_input/
		test/
			1.jpg
			2.jpg
			3.jpg

数据读取

batch_size = 64

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    horizontal_flip=True,
    vertical_flip=True)

train_data_dir = "sorted_training"
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    shuffle=True,
    target_size=(32, 32),
    batch_size=batch_size,
    class_mode='binary')


validation_datagen = ImageDataGenerator(rescale=1. / 255)
validation_data_dir = "sorted_validation"
validation_generator = validation_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(32, 32),
    batch_size=batch_size,
    class_mode='binary')

input_shape = (32,32,3)
num_classes = 2

模型构建(引自https://www.kaggle.com/frlemarchand/simple-cnn-using-keras/notebook)

dropout_dense_layer = 0.6

model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3)))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(dropout_dense_layer))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(dropout_dense_layer))

model.add(Dense(1))
model.add(Activation('sigmoid'))

设定损失函数和优化器,并用准确度作为判读标准

model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.Adam(lr=0.001),
              metrics=['accuracy'])

25次epoch,loss没有降低,停止模型训练

callbacks = [EarlyStopping(monitor='val_loss', patience=25),
             ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]

设定迭代100次

epochs = 100
history = model.fit_generator(train_generator,
          validation_data=validation_generator,
          epochs=epochs,
          verbose=1,
          shuffle=True,
          callbacks=callbacks)

绘图loss和val_loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.show()

绘图准确度和验证集准确度

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.show()

模型训练完毕

测试模型

载入刚才获得的模型

model.load_weights("best_model.h5")

然后载入测试集

test_folder = "test_input/"
test_datagen = ImageDataGenerator(
    rescale=1. / 255)

test_generator = test_datagen.flow_from_directory(
    directory=test_folder,
    target_size=(32,32),
    batch_size=1,
    class_mode='binary',
    shuffle=False
)

预测测试集

pred=model.predict_generator(test_generator,verbose=1)
pred_binary = [0 if value<0.50 else 1 for value in pred]  

最后把预测的结果记录在csv文件里

  • 2
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值