数据连接:形状识别:是方还是圆,本次练习使用的是简单的CNN(之后又时间再试其他网络模型),这个任务是二分类,Dense应该为1,但是我用了to_categorical,后面Dense必须为2。注意使用pd.read_csv读取文件,函数默认将数据第一行作为标题,这点很重要。在保存预测结果时,使用pd.DataFrame,具体的看前面Keras学习。本次使用代码如下:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense,Activation,Conv2D
from keras.layers import MaxPool2D,Flatten,Dropout,ZeroPadding2D,BatchNormalization
from keras.utils import np_utils
from keras import metrics
import keras
from keras.models import save_model,load_model
from keras.models import Model
from keras.callbacks import ModelCheckpoint
import os
from keras.utils import plot_model
from keras.optimizers import SGD,Adam
df=pd.read_csv("data/train.csv")
data=df.as_matrix()
df=None
np.random.shuffle(data)
x_train=data[0:,1:-1]
x_train=x_train.reshape(data.shape[0],40,40,1).astype("float32")#将x_train变为data.shape[0]=60000个28*28,1通道的矩阵
x_train=x_train/255.0
y_train=np_utils.to_categorical(data[0:,-1],2).astype("float32")
df=pd.read_csv("data/test.csv")
data=df.as_matrix()
df=None
#np.random.shuffle(data)
x_test=data[:,1:]
x_test=x_test.reshape(data.shape[0],40,40,1).astype("float32")
x_test=x_test/255.0
# print(x_train.shape)
# print(y_train.shape)
batch_size=32
n_filters=32
pool_size=(2,2)
cnn_net=Sequential()
cnn_net.add(Conv2D(32,kernel_size=(3,3),strides=(1,1),input_shape=(40,40,1)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(ZeroPadding2D((1,1)))
cnn_net.add(Conv2D(48,kernel_size=(3,3)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(ZeroPadding2D((1,1)))
cnn_net.add(Conv2D(64,kernel_size=(2,2)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(Dropout(0.25))
#Flatten层用来将输入“压平”,即把多维的输入一维化,常用在从卷积层到全连接层的过渡。
cnn_net.add(Flatten())
cnn_net.add(Dense(512))#3168
cnn_net.add(Activation('relu'))
cnn_net.add(Dense(2))
cnn_net.add(Activation('softmax'))
#summary查看网络结构
cnn_net.summary()
sgd =SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
cnn_net.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['acc'])
hist=cnn_net.fit(x_train,y_train,batch_size=batch_size,epochs=50,verbose=1,validation_split=0.2)#50
cnn_net.save("cnn_net_1.h5")
print("test predict------")
result = cnn_net.predict(x_test,batch_size=64,verbose=0)
result = cnn_net.predict_classes(x_test)
name=['y']
title=list(range(4000,7550))
result=pd.DataFrame(data=result,columns=name)
result.index=title
#result_test=pd.DataFrame(columns=name,data=result)
result.to_csv('result_1.csv',index=True)
# plt.plot(hist.history['loss'])
# plt.plot(hist.history['val_loss'])
# plt.title('model loss')
# plt.ylabel('loss')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.savefig("loss_1.png")
# plt.clf()
# plt.plot(hist.history['acc'])
# plt.plot(hist.history['val_acc'])
# plt.title('model acc')
# plt.ylabel('acc')
# plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
# plt.savefig("acc_1.png")
形状识别2:方圆之外:
数据下载:形状识别2:方圆之外,这个表面上看似二分类,但是预测时是有3类,用Dense(1)二分类,结果只有一个概率,不好判断,这个时候用to_categorical编码,用Dense(2),结果会有2个概率,这个时候比较好判断,但运行结果不是很理想,F1只有0.7879,应该很最后预测的阈值有关,有待改进。当前训练代码如下:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense,Activation,Conv2D
from keras.layers import MaxPool2D,Flatten,Dropout,ZeroPadding2D,BatchNormalization
from keras.utils import np_utils
from keras import metrics
import keras
from keras.models import save_model,load_model
from keras.models import Model
from keras.callbacks import ModelCheckpoint
import os
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD,Adam
from keras.utils import plot_model
from keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
import cv2
from PIL import Image
from keras.preprocessing import image
df=pd.read_csv("crPlus/data/train.csv")
data=df.as_matrix()
df=None
np.random.shuffle(data)
x_train=data[:,1:-1]
x_train=x_train.reshape(data.shape[0],40,40,1).astype("float32")
x_train=x_train/255.0
y_train=np_utils.to_categorical(data[:,-1],2).astype("float32")###
# df=pd.read_csv("crPlus/data/test.csv")
# data=df.as_matrix()
# df=None
# x_test=data[:,1:]
# x_test=x_test.reshape(data.shape[0],40,40,1).astype("float32")
# x_test=x_test/255.0
# print(x_train.shape)
# print(y_train.shape)
batch_size=32
n_filters=32
pool_size=(2,2)
cnn_net=Sequential()
cnn_net.add(Conv2D(32,kernel_size=(3,3),strides=(1,1),input_shape=(40,40,1)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(ZeroPadding2D((1,1)))
cnn_net.add(Conv2D(48,kernel_size=(3,3)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(ZeroPadding2D((1,1)))
cnn_net.add(Conv2D(64,kernel_size=(2,2)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(Dropout(0.25))
#Flatten层用来将输入“压平”,即把多维的输入一维化,常用在从卷积层到全连接层的过渡。
cnn_net.add(Flatten())
cnn_net.add(Dense(512))#3168
cnn_net.add(Activation('relu'))
cnn_net.add(Dense(2))
cnn_net.add(Activation('softmax'))#sigmoid
summary查看网络结构
cnn_net.summary()
sgd =SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)##0.0001
cnn_net.compile(loss='categorical_crossentropy',optimizer=sgd, metrics=['acc'])
batch_size=32
hist=cnn_net.fit(x_train,y_train,batch_size=batch_size,epochs=55,verbose=1,validation_split=0.2)#50
cnn_net.save("crPlus/cnn_net_1.h5")
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig("crPlus/loss.png")
plt.clf()
plt.plot(hist.history['acc'])
plt.plot(hist.history['val_acc'])
plt.title('model acc')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig("crPlus/acc.png")
预测代码:
df=pd.read_csv("crPlus/data/test.csv")
data=df.as_matrix()
df=None
x_test=data[:,1:]
x_test=x_test.reshape(data.shape[0],40,40,1).astype("float32")
x_test=x_test/255.0
cnn_net=load_model("crPlus/cnn_net_1.h5")
print("test predict------")
result = cnn_net.predict(x_test,batch_size=64, verbose=0)
pre=[]
for i in result:
if i[0]>0.999:
pre.append(0)
elif i[1]>0.999:
pre.append(1)
else:
pre.append(2)
pre=np.array(pre)
name=['y']
res=pd.DataFrame(data=pre,columns=name)
res.to_csv('crPlus/result_pre.csv',index=True)
未to_categorical编码代码:
df=pd.read_csv("crPlus/data/train.csv")
data=df.as_matrix()
df=None
np.random.shuffle(data)
x_train=data[:,1:-1]
x_train=x_train.reshape(data.shape[0],40,40,1).astype("float32")
x_train=x_train/255.0
#y_train=np_utils.to_categorical(data[:,-1],2).astype("float32")###
y_train=data[:,-1]
batch_size=32
n_filters=32
pool_size=(2,2)
cnn_net=Sequential()
cnn_net.add(Conv2D(32,kernel_size=(3,3),strides=(1,1),input_shape=(40,40,1)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(ZeroPadding2D((1,1)))
cnn_net.add(Conv2D(48,kernel_size=(3,3)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(ZeroPadding2D((1,1)))
cnn_net.add(Conv2D(64,kernel_size=(2,2)))
cnn_net.add(Activation('relu'))
cnn_net.add(BatchNormalization(epsilon=1e-6,axis=1))
cnn_net.add(MaxPool2D(pool_size=pool_size))
cnn_net.add(Dropout(0.25))
#Flatten层用来将输入“压平”,即把多维的输入一维化,常用在从卷积层到全连接层的过渡。
cnn_net.add(Flatten())
cnn_net.add(Dense(512))#3168
cnn_net.add(Activation('relu'))
cnn_net.add(Dense(1))#
cnn_net.add(Activation('sigmoid'))#softmax
#summary查看网络结构
cnn_net.summary()
sgd =SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)##0.0001
cnn_net.compile(loss='binary_crossentropy',optimizer=sgd, metrics=['acc'])#categorical_crossentropy
batch_size=32
hist=cnn_net.fit(x_train,y_train,batch_size=batch_size,epochs=60,verbose=1,validation_split=0.2)#50
cnn_net.save("crPlus/cnn_net_bin.h5")
plt.plot(hist.history['loss'])
plt.plot(hist.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig("crPlus/loss_bin.png")
plt.clf()
plt.plot(hist.history['acc'])
plt.plot(hist.history['val_acc'])
plt.title('model acc')
plt.ylabel('acc')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.savefig("crPlus/acc_bin.png")