环境:jupyter notebook(tensorflow为2.0)
工作:使用卷积神经网络实现对卫星拍摄的飞机与湖泊图像的二分类
备注:数据集可在kaggle上下载,嫌麻烦的同学可以留言或私信我拿此卫星图像数据集。
简介:所使用数据集包含700张湖泊图片和700张飞机图片
重点来咯
- 导入相关包
from tensorflow import keras
import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display as display
import numpy as np
import pathlib
- 设置目录。在这里需要提醒一点的是,加载的数据集需要以文件夹形式,不能用zip,否则会报错说找不到文件。
data_dir = '2_class/2_class'
- 提取路径
data_root = pathlib.Path(data_dir)
- 迭代查看一下是否正确哈
for item in data_root.iterdir():
print(item)
输出:
2_class\2_class\airplane
2_class\2_class\lake
没毛病,继续
- 提取所有图片成列表形式
all_image_paths = list(data_root.glob('*/*'))
- 张数
image_count = len(all_image_paths)
- 把面向对象变成实际路径并打乱
import random
all_image_paths = [str(path) for path in all_image_paths]
random.shuffle(all_image_paths)
- 取出data_root所有目录并排序
label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
label_names
输出:
[‘airplane’, ‘lake’]
- 将目录自动排号
label_to_index = dict((name, index) for index,name in enumerate(label_names))
label_to_index
输出:
{‘airplane’: 0, ‘lake’: 1}
- 任取一张图得到其类别,再通过parent.name得到其编码
all_image_labels = [label_to_index[pathlib.Path(path).parent.name] for path in all_image_paths]
- 看下前5张图的编码
all_image_labels[:5]
输出:
[0, 1, 1, 1, 1]
- 定义函数
def caption_image(label):
return {0: 'airplane', 1: 'lake'}.get(label)
13.挑三张图看看咋样了
for n in range(3):
image_index = random.choice(range(len(all_image_paths)))
display.display(display.Image(all_image_paths[image_index]))
print(caption_image(all_image_labels[image_index]))
print()
下面对图片进行处理,
14. 定义一个预处理函数:读取图片路径,解码图片为tensor形式,转化其数据类型为float32,最后进行标准化。处理后的图片其实与原图差别不大,但是更便于计算了
def load_and_preprocess_image(path):
image = tf.io.read_file(path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, [256, 256])
image = tf.cast(image, tf.float32)
image = image/255.0 # normalize to [0,1] range
return image
- 把所有图片做成数据集
path_ds = tf.data.Dataset.from_tensor_slices(all_image_paths)
- 让他自动运算把
AUTOTUNE = tf.data.experimental.AUTOTUNE
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)
- 列表数据集
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(all_image_labels, tf.int64))
for label in label_ds.take(10):
print(label_names[label.numpy()])
看一看,输出:
airplane
lake
lake
lake
lake
lake
airplane
airplane
airplane
lake
- 取总数据的百分之二十为测试数据,剩下的为训练数据
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))
test_count = int(image_count*0.2)
train_count = image_count - test_count
train_data = image_label_ds.skip(test_count)
test_data = image_label_ds.take(test_count)
- 喂32次
BATCH_SIZE = 32
- 构建训练图片输入管道
train_data = train_data.apply(
tf.data.experimental.shuffle_and_repeat(buffer_size=train_count))
train_data = train_data.batch(BATCH_SIZE)
train_data = train_data.prefetch(buffer_size=AUTOTUNE)
- 测试图片
test_data = test_data.batch(BATCH_SIZE)
下面开始建立模型
- 顺序模型
model = tf.keras.Sequential() #顺序模型
model.add(tf.keras.layers.Conv2D(64, (3, 3), input_shape=(256, 256, 3), activation='relu'))
model.add(tf.keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
model.add(tf.keras.layers.Conv2D(128, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu'))
model.add(tf.keras.layers.Conv2D(256, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(512, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(512, (3, 3), activation='relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(1024, (3, 3), activation='relu'))
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dense(1024, activation='relu'))
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
- 编译,损失函数的定义在上章博客(三)有介绍
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['acc']
)
- 定义一下
steps_per_epoch = train_count//BATCH_SIZE
validation_steps = test_count//BATCH_SIZE
- 使用fit训练
history = model.fit(train_data, epochs=30, steps_per_epoch=steps_per_epoch, validation_data=test_data, validation_steps=validation_steps)
使用电脑CPU的话会慢到怀疑人生,不过毕竟我不是深度学习方向的就先将就一下把呜呜
- 最后做下可视化处理,画个图
history.history.keys()
plt.plot(history.epoch, history.history.get('acc'), label='acc')
plt.plot(history.epoch, history.history.get('val_acc'), label='val_acc')
plt.legend()
plt.plot(history.epoch, history.history.get('loss'), label='loss')
plt.plot(history.epoch, history.history.get('val_loss'), label='val_loss')
plt.legend()
图就不放了,大家可以对程序进行修改,结果肯定不一样的
上述程序取自‘日月光华TENSORFLOW2.0简明实战教程’里老师讲述的内容,买了他的课听了听然后在这里加上自己的理解写了博客,不知道那位老师叫什么名字但是他的课还是挺好的。
拜拜