因为之前想做字母识别,没找到训练集(找到的有点少),所以现在先写了点xgboost与cnn训练mnist训练集。
效果挺好,但是问题是自己有一些数字图片,也需要识别,比如下面,但是效果就很差,后来发现是因为数字不是很居中...后来做了些前期图片预处理,不知道有没有什么方法不用做预处理的。两个方法的代码都放下来了,数据集就是mnist的。
很乱,懒得整理,将就看看吧。反正挺简单的。接下来准备识别下字母...感觉也不难,图片预处理比较重要,因为验证码可能需要切割。接下来整理了一下12306的验证码,也准备做下。
def decode_idx3_ubyte(idx3_ubyte_file):
"""
解析idx3文件的通用函数
:param idx3_ubyte_file: idx3文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx3_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽
offset = 0
fmt_header = '>iiii'
magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)
print('魔数:%d, 图片数量: %d张, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))
# 解析数据集
image_size = num_rows * num_cols
offset += struct.calcsize(fmt_header)
fmt_image = '>' + str(image_size) + 'B'
images = np.empty((num_images, num_rows, num_cols))
for i in range(num_images):
if (i + 1) % 10000 == 0:
print('已解析 %d' % (i + 1) + '张')
images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))
offset += struct.calcsize(fmt_image)
return images
def decode_idx1_ubyte(idx1_ubyte_file):
"""
解析idx1文件的通用函数
:param idx1_ubyte_file: idx1文件路径
:return: 数据集
"""
# 读取二进制数据
bin_data = open(idx1_ubyte_file, 'rb').read()
# 解析文件头信息,依次为魔数和标签数
offset = 0
fmt_header = '>ii'
magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)
print('魔数:%d, 图片数量: %d张' % (magic_number, num_images))
# 解析数据集
offset += struct.calcsize(fmt_header)
fmt_image = '>B'
labels = np.empty(num_images)
for i in range(num_images):
if (i + 1) % 10000 == 0:
print('已解析 %d' % (i + 1) + '张')
labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]
offset += struct.calcsize(fmt_image)
return labels
def xgboost_train():
global test_label
from sklearn.model_selection import train_test_split
train_data = pd.read_csv('train_data.csv',index_col=0).astype(int)
print('csv_file read over')
train_data = train_data.applymap(lambda x:0 if x<180 else 1)
y_train = decode_idx1_ubyte(train_label)
x_train,x_val,y_train,y_val = train_test_split(train_data, pd.DataFrame(y_train),test_size=0.1)
param_list = [("eta", 0.1), ("max_depth", 5), ("subsample", 0.8),
("colsample_bytree", 0.8), ("objective", "multi:softmax"),
("eval_metric", "merror"), ("alpha", 8), ("lambda", 2),
("num_class", 10), ('silent', False),('n_jobs',-1)]
n_rounds = 300
early_stopping = 10
d_train = xgb.DMatrix(x_train,label=y_train)
d_val = xgb.DMatrix(x_val,label=y_val)
watch_list = [(d_train,'train'),(d_val,'val')]
model = xgb.train(param_list, d_train, n_rounds, evals=watch_list,
early_stopping_rounds=early_stopping)
joblib.dump(model, 'number_rec_2.pkl')
def predict_score():
model = joblib.load('number_rec_1.pkl')
test_data = pd.read_csv('test_data.csv', index_col=0).astype(int)
print('csv_file read over')
test_data = test_data.applymap(lambda x: 0 if x < 180 else 1)
y_test = decode_idx1_ubyte(test_label)
prediction = model.predict(xgb.DMatrix(test_data))
print(metrics.accuracy_score(y_test, prediction))
def xgboost_predict():
i = 1
model = joblib.load('number_rec_1.pkl')
test = pd.read_csv('test_data.csv', index_col=0)
img = Image.open('Img/Sample005/img005-{}.png'.format(str(i + 1).zfill(3))).convert('L')
# img = Image.open('image_test.jpg').convert('L')
# img.show()
img_temp = 255 - np.array(img)
left_temp = np.min(np.where(sum(img_temp) > 0))
right_temp = np.max(np.where(sum(img_temp) > 0))
width = max(950, right_temp - left_temp)
left = int(max(0, left_temp - (width) / 3))
right = int(min(1200, right_temp + (width) / 3))
up_temp = np.min(np.where(img_temp.sum(axis=1) > 0))
down_temp = np.max(np.where(img_temp.sum(axis=1) > 0))
up = int(max(0, up_temp - (down_temp - up_temp) / 4))
down = int(min(900, down_temp + (down_temp - up_temp) / 4))
img = img.crop((left, up, right, down))
# img = img.crop((200,100,1000,800))
img = img.resize((28, 28))
# img.show()
f = np.vectorize(lambda x: 0 if x > 180 else 1)
img = f(np.array(img.convert('L')).flatten()).reshape(1, -1)
img = pd.DataFrame(np.array(img).reshape((1, -1)))
# print(np.array(img).reshape((28,28)))
img.columns = img.columns.astype(str)
print(model.predict(xgb.DMatrix(img)))
def cnn_train(train=True):
num_classes = 10
datas_holder = tf.placeholder(tf.float32, [None, 28, 28, 1])
labels_holder = tf.placeholder(tf.int32, [None])
dropout_holder = tf.placeholder(tf.float32)
conv0 = tf.layers.conv2d(datas_holder, 20, 5, activation=tf.nn.relu)
pool0 = tf.layers.max_pooling2d(conv0, [2,2], [2,2])
conv1 = tf.layers.conv2d(pool0, 40, 4, activation=tf.nn.relu)
pool1 = tf.layers.max_pooling2d(conv1, [2,2], [2,2])
flatten = tf.contrib.layers.flatten(pool1)
fc = tf.layers.dense(flatten, 400, activation=tf.nn.relu)
dropout_fc = tf.layers.dropout(fc, dropout_holder)
logits = tf.layers.dense(dropout_fc, num_classes)
predict_labels = tf.arg_max(logits, 1)
losses = tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(labels_holder, num_classes),
logits=logits)
mean_loss = tf.reduce_mean(losses)
optimizer = tf.train.AdamOptimizer(learning_rate=0.01).minimize(losses)
saver = tf.train.Saver()
with tf.Session() as sess:
if train:
train_data = decode_idx3_ubyte(train_file)
train_labels = decode_idx1_ubyte(train_label)
train_data = np.array(train_data)
f = np.vectorize(lambda x: 0 if x < 180 else 1)
train_data = f(train_data)
print('trian now...')
sess.run(tf.global_variables_initializer())
train_feed_dict = {
datas_holder: train_data.reshape(-1,28,28,1),
labels_holder: train_labels,
dropout_holder: 0.25
}
for step in range(150):
_, mean_loss_val = sess.run([optimizer, mean_loss],
feed_dict=train_feed_dict)
if not step%10:
print('step = {}\tmean loss={}'.format(step, mean_loss_val))
saver.save(sess, 'cnn_model_1')
print('train is over...')
else:
print('test now...')
saver.restore(sess, 'cnn_model_1')
def load_img():
imgs = []
for i in range(1,11):
for j in range(1,6):
img = Image.open('Img/Sample{}/img{}-{}.png'.format(str(i).zfill(3),str(i).zfill(3),str(j*5).zfill(3)))\
.convert('L')
img_temp = 255 - np.array(img)
left_temp = np.min(np.where(sum(img_temp) > 0))
right_temp = np.max(np.where(sum(img_temp) > 0))
width = max(600, right_temp - left_temp)
left = int(max(0, left_temp - (width) / 3))
right = int(min(1200, right_temp + (width) / 3))
up_temp = np.min(np.where(img_temp.sum(axis=1) > 0))
down_temp = np.max(np.where(img_temp.sum(axis=1) > 0))
up = int(max(0, up_temp - (down_temp - up_temp) / 4))
down = int(min(900, down_temp + (down_temp - up_temp) / 4))
img = img.crop((left, up, right, down))
img = img.resize((28,28))
imgs.append(img)
imgs = [np.array(x) for x in imgs]
return np.array(imgs)
test_data = load_img()
# test_data = Image.open('Img/Sample004/img004-006.jpg').convert('L')
# test_data.resize((28,28)).show()
#
# test_data = test_data.resize((28, 28))
# test_data.show()
test_data = np.array(test_data)
f = np.vectorize(lambda x: 0 if x > 180 else 1)
test_data = f(test_data)
# mnist
# test_data = decode_idx3_ubyte(test_file)
#
# test_data = np.array(test_data)[:10]
# f = np.vectorize(lambda x: 0 if x < 180 else 1)
# test_data = f(test_data)
# end
print(test_data[0])
# test_data = decode_idx3_ubyte(train_file)[:20]
# test_labels = decode_idx1_ubyte(train_label)
# print(test_labels[:10])
test_feed_dict = {
datas_holder: test_data.reshape(-1,28,28,1),
labels_holder: [1],
dropout_holder: 0
}
predicted_labels_val = sess.run(predict_labels,
feed_dict=test_feed_dict)
print(predicted_labels_val)
def test_real_img():
model = joblib.load('number_rec_1.pkl')
test = pd.read_csv('test_data.csv',index_col=0)
img1 = (test.loc[300,:]).apply(lambda x:1 if x>180 else 0)
img_temp = img1.astype('uint8')*255
img1 = pd.DataFrame(img1.reshape(1,-1))
Image.fromarray(np.array(img_temp).reshape((28,28))).show()
img1.columns = img1.columns.astype(str)
print(img1)
print(model.predict(xgb.DMatrix(img1)))
img = Image.open('Img/Sample005/img005-003.png').convert('L')
img = img.resize((28,28))
img.show()
img = pd.DataFrame(np.array(img).reshape((1,-1)))
print(img.any())
img.fillna(255,inplace=True)
img.columns = img.columns.astype(int)
img = img.apply(lambda x:1 if x<180 else 0)
print(img)
print(model.predict(xgb.DMatrix(img)))
# for i in range(2):
# for j in range(2):
# img = Image.open('Img/Sample{}/img{}-{}.jpg'.format(str(i+1).zfill(3),str(i+1).zfill(3),str(j+1).zfill(3)))
# # size = img.size[0]*img.size[1]
# img = img.resize((28,28))
# img_temp = img
# # img.show()
# # # img.show()
# # img1 = img.crop((0,0,36,36))
# # img1.show()
# f = np.vectorize(lambda x:0 if x>100 else 1)
# img = f(np.array(img.convert('L')).flatten()).reshape(1,-1)
# #
# img = pd.DataFrame(img)
# img.columns = img.columns.astype(str)
# # print(img)
# prediction = model.predict(xgb.DMatrix(img))
# print('predict is: ',int(prediction[0]))
# print('real is: ',i)
# if i!=prediction:
# img_temp.show()