#此代码基于tensorflow2.0
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pathlib #路径管理工具,查一下用法
data_dir = '2_class'
data_root = pathlib.Path(data_dir) #构建路径对象
for item in data_root.iterdir(): #对目录进行迭代
print(item)
all_image_path = list(data_root.glob('*/*')) #提取所有的路径中的所有文件
all_image_path = [str(path) for path in all_image_path] #得到全部图片的路径
import random #用来乱序
random.shuffle(all_image_path)
image_count = len(all_image_path) #记录图片数量
image_count
label_names = sorted(item.name for item in data_root.glob('*/')) #提取所有的目录,根据首字母排序
label_names
label_to_index = dict((name, index) for index, name in enumerate(label_names))
#这样就对数据完成了编码字典构建
pathlib.Path('dataset\\2_class\\lake\\lake_060.jpg').parent.name
all_image_label = [label_to_index[pathlib.Path(p).parent.name] for p in all_image_path]
#完成对数据的全部编码
import IPython.display as display #这个方法用来显示图片
index_to_label = dict((v,k) for k,v in label_to_index.items())
for n in range(3) #随机显示三个图片,后续好好看看这部分代码
image_index = random.choice(range(len(all_image_image_path)))
display.display(display.Image(all_image_path[image_index]))
print(index_to_label[all_image_label[image_index]])
print()
#----------------------------------------------------------------------------------
#图片的读取和解码!!!
img_path = all_image_path[0]
img_raw = tf.io.read_file(img_path) #读取为二进制文件,tf.tesnor类型
img_tensor = tf.image.decode_image(img_raw) #解码图片
img_tensor.shape #查看数据形状
img_tensor.dtype #查看数据类型
#----------------------------------------------------------------------------------
#图像的标准化直接除以255比较方便
img_tensor = tf.cast(img_tensor, tf.float32) #转换数据类型
img_tensor = img_tensor/255
img_tensor.numpy().max()
img_tensor.numpy().min() #先转换为numpy格式,再进行查询
#----------------------------------------------------------------------------------
#定义预处理函数
def load_and_preprocess_image(img_path):
img_raw = tf.io.read_file(img_path)
#img_tensor = tf.image.decode_image(img_raw) #通用解析方法,但是不返回格式,
img_tensor = tf.image.decode_jpeg(img_raw) #特定解析方法,返回格式
img_tensor = tf.image.resize(img_tensor,[256, 256])
img_tensor = tf.cast(img_tensor. tf.float32)
img_tensor = img_tensor/255
return img_tensor
#----------------------------------------------------------------------------------
image_path = all_image_path[100]
plt.imshow(load_and_preprocess_image(image_path)) #绘制图像
path_ds = tf.data.Dataset.from_tensor_slices(all_image_path) #建立dataset
image_dataset = path_ds.map(load_and_preprocess_image) #处理路径 【map查一下~~~】
#这个image_dataset的形状是unknown为啥呢?
label_dataset = tf.data.Dataset.from_tensor_slices(all_image_label)
for label in label_dataset.take(10):
print(label.numpy())
#合并数据
dataset = tf.data.Dataset.zip((image_dataset,label_dataset))
test_count = int(image_count*0.2)
train_count = image_count - test_count
#获得数据集
train_dataset = dataset.skip(test_count) #跳过多少数据
test_dataset = dataset.take(test_count)
BATCH_SIZE = 32
train_dataset = train_dataset.shuffle(buffer_size = train_count).batch(BATCH_SIZE)
#在train_size范围内shuffle
#不写repeat的时候,表示无限制的重复
test_dataset = test_dataset.batch(BATCH_SIZE)
#----------------------------------------------------------------------------------
#构造模型
#复制代码
model = tf.keras.Sequential() #顺序模型
model.add(tf.keras.layers.Conv2D(64, (3, 3), input_shape=(256, 256, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Conv2D(64, (3, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(128, (3, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Conv2D(128, (3, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(256, (3, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Conv2D(256, (3, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(512, (3, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(512, (3, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.MaxPooling2D())
model.add(tf.keras.layers.Conv2D(1024, (3, 3)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.GlobalAveragePooling2D())
model.add(tf.keras.layers.Dense(1024))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Dense(256))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Activation('relu'))
model.add(tf.keras.layers.Dense(1, activation='sigmoid'))
#编译模型
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['acc']
)
#计算每个batch的步长
steps_per_epoch = train_count//BATCH_SIZE
validation_steps = test_count//BATCH_SIZE
#训练模型
history = model.fit(train_dataset, epochs=35, steps_per_epoch=steps_per_epoch, validation_data=test_dataset, validation_steps=validation_steps)
#查看结果
history.history.keys()
plt.plot(history.epoch, history.history.get('acc'), label='acc')
plt.plot(history.epoch, history.history.get('val_acc'), label='val_acc')
plt.legend()
plt.plot(history.epoch, history.history.get('loss'), label='loss')
plt.plot(history.epoch, history.history.get('val_loss'), label='val_loss')
plt.legend()
机器学习:【13】卫星图像识别实践
于 2020-02-10 11:26:19 首次发布