字符识别top4%代码

# In[]
from keras.datasets import mnist
import scipy.io as sio
import os
import pandas as pd

from keras.utils import np_utils
from keras.models import Sequential
from keras import backend

from keras.layers import Dense, Dropout,Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# In[载入经典MNIST数据:训练集60000测试集10000]
if not os.path.exists("mnist_train_X.mat"):
    (mnist_train_X, mnist_train_y), (mnist_test_X, mnist_test_y) = \
        mnist.load_data()
    sio.savemat('mnist_train_X.mat', {'mnist_train_X' : mnist_train_X})
    sio.savemat('mnist_train_y.mat', {'mnist_train_y' : mnist_train_y})
    sio.savemat('mnist_test_X.mat', {'mnist_test_X' : mnist_test_X})
    sio.savemat('mnist_test_y.mat', {'mnist_test_y' : mnist_test_y})
else:
    mnist_train_X = sio.loadmat('mnist_train_X.mat')['mnist_train_X']
    mnist_train_y = sio.loadmat('mnist_train_y.mat')['mnist_train_y']
    mnist_test_X = sio.loadmat('mnist_test_X.mat')['mnist_test_X']
    mnist_test_y = sio.loadmat('mnist_test_y.mat')['mnist_test_y']

# 使之后对标签进行one-hot编码后满足网络指定格式
mnist_train_y = mnist_train_y.reshape(mnist_train_y.shape[1],)
mnist_test_y = mnist_test_y.reshape(mnist_test_y.shape[1],)


# In[Kaggle的训练集42000测试集28000]
kaggle_train_X = pd.read_csv('train.csv')
kaggle_test_X = pd.read_csv('test.csv')
kaggle_train_y = kaggle_train_X['label']
kaggle_train_X.drop(['label'],axis=1, inplace=True)

# 针对DataFrame没有reshape属性的办法
kaggle_train_X = np.array(kaggle_train_X)
kaggle_test_X = np.array(kaggle_test_X)

## In[数据总览]
## 绘制计数直方图
#sns.countplot(mnist_train_y)
#plt.xlabel("keras_mnist label")
#plt.show()
## 使用pd.Series.value_counts()
#unique, count= np.unique(mnist_train_y, return_counts=True)
#print("number of labels of keras_mnist = \n%s " % dict (zip(unique, count) ), "\n" )
#
## 绘制计数直方图
#sns.countplot(kaggle_train_y)
#plt.xlabel("kaggle_mnist label")
#plt.show()
## 使用pd.Series.value_counts()
#unique, count= np.unique(kaggle_train_y, return_counts=True)
#print("number of labels of kaggle_mnist = \n%s " % dict (zip(unique, count) ), "\n" )

# In[数据归一化]
mnist_train_X = mnist_train_X / 255
mnist_test_X = mnist_test_X / 255
kaggle_train_X = kaggle_train_X / 255
kaggle_test_X = kaggle_test_X / 255

# In[将数据reshape成符合CNN输入的格式]
mnist_train_X = \
    mnist_train_X.reshape(mnist_train_X.shape[0],28,28,1).astype('float32')
mnist_test_X = \
    mnist_test_X.reshape(mnist_test_X.shape[0],28,28,1).astype('float32')
mnist_train_y = np_utils.to_categorical(mnist_train_y)
mnist_test_y = np_utils.to_categorical(mnist_test_y)

kaggle_train_X = \
    kaggle_train_X.reshape(kaggle_train_X.shape[0],28,28,1).astype('float32')
kaggle_test_X = \
    kaggle_test_X.reshape(kaggle_test_X.shape[0],28,28,1).astype('float32')
kaggle_train_y = np_utils.to_categorical(kaggle_train_y)

# In[增强数据]
# 将Kaggle和MNIST训练集集合在一起
X_train = np.concatenate((mnist_train_X,kaggle_train_X), axis=0)
y_train = np.concatenate((mnist_train_y,kaggle_train_y), axis=0)

# In[构建网络结构]
# 顺序层被堆叠,使得每个层都将其输出传递到下一层,而无需指定额外信息
model = Sequential() 
backend.set_image_data_format('channels_last') # 规定图像数据输入格式:通道在最后

model.add(Conv2D(40, kernel_size=5, padding="same",input_shape=(28, 28, 1), \
          activation = 'relu'))
model.add(Conv2D(50, kernel_size=5, padding="valid", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(70, kernel_size=3, padding="same", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(100, kernel_size=3, padding="valid", activation = 'relu'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))

model.add(Flatten())
model.add(Dense(100))
model.add(Activation("relu"))
model.add(Dropout(0.2))

model.add(Dense(10))
model.add(Activation("softmax"))
model.compile(loss='categorical_crossentropy', optimizer='adam', \
              metrics=['accuracy'])

# In[绘制网络结构]
from keras.utils.vis_utils import plot_model

plot_model(model, to_file='model.png', show_shapes=True, show_layer_names=False)

# In[设置训练网络的epoch和batch_size]
epochs = 32
batch_size = 2048

model.fit(X_train, y_train, epochs= epochs , batch_size = batch_size, \
          validation_split = 0.2)

scores = model.evaluate(mnist_test_X, mnist_test_y, verbose = 10 )
print ( scores )


# In[]
#kaggle_test_X = (kaggle_test_X.values).reshape(kaggle_test_X.shape[0], 28, 28 , 1).astype('float32')

res = model.predict(kaggle_test_X)
res = np.argmax(res,axis = 1)
res = pd.Series(res, name="Label")
submission = pd.concat([pd.Series(range(1 ,28001) ,name = "ImageId"),   res],\
                        axis = 1)
submission.to_csv("LCNN2_"+str(epochs)+"epoch_"+str(batch_size)+\
                  "batch_size.csv",index=False)
 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值