首先来看一下题目要求
①收集数据,并标注数据来源与方式:
在百度上进行搜索,发现了和鲸社区
搜索:天气图片数据集进入,点击
进入
@article{RSCM2017,
title = {RSCM: Region Selection and Concurrency Model for Multi-class Weather Recognition},
author = {Di Lin and Cewu Lu and Hui Huang and Jiaya Jia},
journal = {IEEE Transactions on Image Processing},
volume = {26},
number = {9},
year = {2017},
pages = {4154–4167},
}
进入RSCN
选择这两个压缩文件即可
②数据清洗与可视化分析
我对上面的图片数据集进行了一个大致的整理,有一些图片我自己都分不清楚是什么类别,干脆手动删除,打算做5个分类:haze 、 lingting 、sun snow、 cloudy,但是编号是个问题,下面使用python 进行了编号处理,方便得很,推荐一波~
import os
class BatchRename():
'''
批量重命名文件夹中的图片文件
'''
def __init__(self):
# 此处应该为自己存放图片文件夹的路径
self.path = 'D:\ChromeCoreDownloads\图像分类_天气\sun'
def rename(self):
filelist = os.listdir(self.path)
total_num = len(filelist)
i = 0
for item in filelist:
if item.endswith('.jpg'):
src = os.path.join(os.path.abspath(self.path), item)
dst = os.path.join(os.path.abspath(self.path), str(i) + '.jpg')
try:
os.rename(src, dst)
print('converting %s to %s ...' % (src, dst))
i = i + 1
except:
continue
print ('total %d to rename & converted %d jpgs' % (total_num, i))
if __name__ == '__main__':
demo = BatchRename()
demo.rename()
效果图
转换图片像素,使其大小一致 参考于此篇大佬博客
PIL安装失败请戳此处~
我调整参数弄了一下午,把代码改得七零八碎,也不太对,总是报except
Houm后面的错误,基本要放弃了。。。后来尝试了一下,把except
删除,终于找到了错误原因/(ㄒoㄒ)/~~
# 1.数据清洗
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras import datasets,layers,models
def read_image(paths):
os.listdir(paths)
filelist = []
for root, dirs, files in os.walk(paths):
for file in files:
if os.path.splitext(file)[1] == ".jpg":
filelist.append(os.path.join(root, file))
return filelist
def im_xiangsu(paths):
for filename in paths:
try:
im = Image.open(filename)
newim = im.resize((128, 128))
newim.save( filename+ '.jpg')
print('图片' + filename[12:-4] + '.jpg' + '像素转化完成')
except OSError as e:
print(e.args)
test='D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou' #要测试的图片的路径
filelist=read_image(test)
im_xiangsu(filelist)
像素转换完成。但是还要删除原像素的图片,防止多于图片被训练导致内存爆炸。。。
将图片数据转化成数组:此次又踩无数坑。。。调试多次M,终于搞懂。。此处是制作训练集图片:图片标签方式:每类图片所在文件夹相应类别编号【见-3行】*
path1 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou\snowy'
path2 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou\sunny'
filelist_1 = read_image(path1)
filelist_2 = read_image(path2)
filelist_all = filelist_1 + filelist_2
M = []
M = im_array(filelist_all)
dict_label={0:'雪天',1:'晴天'}
train_images=np.array(M).reshape(len(filelist_all),128,128)
label=[0]*len(filelist_1)+[1]*len(filelist_2)
train_lables=np.array(label) #数据标签
train_images = train_images[ ..., np.newaxis ] #数据图片,此处没毛病!
附上文件夹图片
③可视化数据
这里由于我已将图片转化为数组形式,没有用matplotlib绘制出来,但可以看一下标签输出及数据尺寸
print(train_images.shape)# 两个数据集各86张图片
print(train_lables.shape)
print(train_lables)
④进入神经网络进行模型训练
【由于作者本身是个小白,对这些模型的理解就是公式,并感激于keras的友好此处可以稍微参考一下我学习keras时写的文章~】
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
model.summary() # 显示模型的架构
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#epochs为训练多少轮、batch_size为每次训练多少个样本
model.fit(train_images, train_lables, epochs=5)
print(model.evaluate(train_images,train_lables))
查看精度
可以看到,在训练集上,模型精度可以达到88%,下一步我将会制作测试集数据,并预测模型准确率,先将此时完成的代码粘到这里~
# 1.数据清洗、图片处理
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras import datasets,layers,models
def read_image(paths):
os.listdir(paths)
filelist = []
for root, dirs, files in os.walk(paths):
for file in files:
if os.path.splitext(file)[1] == ".jpg":
filelist.append(os.path.join(root, file))
return filelist
def im_xiangsu(paths):
for filename in paths:
try:
im = Image.open(filename)
newim = im.resize((128, 128))
newim.save( filename+ '.jpg')
print('图片' + filename[12:-4] + '.jpg' + '像素转化完成')
except OSError as e:
print(e.args)
def im_array(paths):
M=[]
for filename in paths:
im=Image.open(filename)
im_L=im.convert("L") #模式L
Core=im_L.getdata()
arr1=np.array(Core,dtype='float32')/255.0
list_img=arr1.tolist()
M.extend(list_img)
return M
path1 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou\snowy'
path2 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou\sunny'
filelist_1 = read_image(path1)
filelist_2 = read_image(path2)
filelist_all = filelist_1 + filelist_2
M = []
M = im_array(filelist_all)
dict_label={0:'雪天',1:'晴天'}
train_images=np.array(M).reshape(len(filelist_all),128,128)
label=[0]*len(filelist_1)+[1]*len(filelist_2)
train_lables=np.array(label) #数据标签
train_images = train_images[ ..., np.newaxis ] #数据图片
#数据可视化 :由于图片进行了数组转化,没办法进行展示
# print(train_images.shape)#输出验证一下(172, 128, 128, 1)
# print(train_lables.shape)
# print(train_lables)
# 进入神经网络
#
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
model.summary() # 显示模型的架构
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#epochs为训练多少轮、batch_size为每次训练多少个样本
model.fit(train_images, train_lables, epochs=5)
print(model.evaluate(train_images,train_lables))
⑤模型评估【数据加强】
测试集的制作于训练集基本相似
path3 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou_test\_rainy_'
path4 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou_test\_sunny_'
filelist_3 = read_image(path3)
filelist_4 = read_image(path4)
filelist_tot = filelist_3 + filelist_4
N = []
N = im_array(filelist_tot)
dict_label={0:'雨天',1:'晴天'}
test_images=np.array(N).reshape(len(filelist_tot),128,128)
label=[0]*len(filelist_3)+[1]*len(filelist_4)
test_lables=np.array(label) #数据标签
test_images = test_images[ ..., np.newaxis ] #数据图片
将数据扩到train:100 test:35
可以看到训练集的精度有所上升,但是测试集的精度很低
print(model.evaluate(train_images,train_lables))
print(model.evaluate(test_images,test_lables))
参数调整:将训练次数epochs
调整为10,发现训练集与测试集的精度都有一定增加,由此得出:增大样本容量和增加训练次数可以提高精度
⑥实现预测
预测图片(应选择裁剪好像素的图片上传):
预测结果:
预测图片:
预测结果
附完整代码:
# 1.数据清洗、图片处理
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras import datasets,layers,models
def read_image(paths):
os.listdir(paths)
filelist = []
for root, dirs, files in os.walk(paths):
for file in files:
if os.path.splitext(file)[1] == ".jpg":
filelist.append(os.path.join(root, file))
return filelist
def im_xiangsu(paths):
for filename in paths:
try:
im = Image.open(filename)
newim = im.resize((128, 128))
newim.save( filename+ '.jpg')
print('图片' + filename[12:-4] + '.jpg' + '像素转化完成')
except OSError as e:
print(e.args)
def im_array(paths):
M=[]
for filename in paths:
im=Image.open(filename)
im_L=im.convert("L") #模式L
Core=im_L.getdata()
arr1=np.array(Core,dtype='float32')/255.0
list_img=arr1.tolist()
M.extend(list_img)
return M
path1 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou_train\_rainy'
path2 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou_train\_sunny'
filelist_1 = read_image(path1)
filelist_2 = read_image(path2)
filelist_all = filelist_1 + filelist_2
M = []
M = im_array(filelist_all)
dict_label={0:'雨天',1:'晴天'}
train_images=np.array(M).reshape(len(filelist_all),128,128)
label=[0]*len(filelist_1)+[1]*len(filelist_2)
train_lables=np.array(label) #数据标签
train_images = train_images[ ..., np.newaxis ] #数据图片
path3 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou_test\_rainy_'
path4 = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou_test\_sunny_'
filelist_3 = read_image(path3)
filelist_4 = read_image(path4)
filelist_tot = filelist_3 + filelist_4
N = []
N = im_array(filelist_tot)
dict_label={0:'雨天',1:'晴天'}
test_images=np.array(N).reshape(len(filelist_tot),128,128)
label=[0]*len(filelist_3)+[1]*len(filelist_4)
test_lables=np.array(label) #数据标签
test_images = test_images[ ..., np.newaxis ] #数据图片
#数据可视化 :由于图片进行了数组转化,没办法进行展示
# print(train_images.shape)#输出验证一下(172, 128, 128, 1)
# print(train_lables.shape)
# print(train_lables)
# 进入神经网络
#
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(2, activation='softmax'))
model.summary() # 显示模型的架构
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
#epochs为训练多少轮、batch_size为每次训练多少个样本
model.fit(train_images, train_lables, epochs=10)
print(model.evaluate(train_images,train_lables))
print(model.evaluate(test_images,test_lables))
# 实现模型预测
def im_s_array(paths):
im = Image.open(paths[0])
im_L = im.convert("L") # 模式L
Core = im_L.getdata()
arr1 = np.array(Core, dtype='float32') / 255.0
list_img = arr1.tolist()
images = np.array(list_img).reshape(-1, 128, 128, 1)
return images
test = 'D:\ChromeCoreDownloads\图像分类_天气\LovelyZhou_predict' # 你要测试的图片的路径
filelist = read_image(test)
im_xiangsu(filelist)
img = im_s_array(filelist)
# 预测图像
predictions_single = model.predict(img)
print("预测结果为:", dict_label[np.argmax(predictions_single)])
# 这里返回数组中概率最大的那个
print(predictions_single)
文件夹情况