数据集:MNIST数据集,包含大量验证码,我们可以使用K近邻算法、支持向量机和深度学习进行验证码识别。
K近邻算法的流程图:
SVC流程图:
多层感知机MLP
# -*- coding: utf-8 -*-
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import CountVectorizer
import os
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import svm
from sklearn import neighbors
# Data loading and preprocessing
import tflearn.datasets.mnist as mnist
def do_cnn_2d(X, Y, testX, testY ):
# Building convolutional network
network = input_data(shape=[None, 28, 28, 1], name='input')
network = conv_2d(network, 32, 3, activation='relu', regularizer="L2")
network = max_pool_2d(network, 2)
network = local_response_normalization(network)
network = conv_2d(network, 64, 3, activation='relu', regularizer="L2")
network = max_pool_2d(network, 2)
network = local_response_normalization(network)
network = fully_connected(network, 128, activation='tanh')
network = dropout(network, 0.8)
network = fully_connected(network, 256, activation='tanh')
network = dropout(network, 0.8)
network = fully_connected(network, 10, activation='softmax')
network = regression(network, optimizer='adam', learning_rate=0.01,
loss='categorical_crossentropy', name='target')
# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit({'input': X}, {'target': Y}, n_epoch=5,
validation_set=({'input': testX}, {'target': testY}),
snapshot_step=100, show_metric=True, run_id='mnist')
def do_dnn_1d(x_train, y_train,x_test , y_test):
print "DNN and 1d"
# Building deep neural network
input_layer = tflearn.input_data(shape=[None, 784])
dense1 = tflearn.fully_connected(input_layer, 64, activation='tanh',
regularizer='L2', weight_decay=0.001)
dropout1 = tflearn.dropout(dense1, 0.8)
dense2 = tflearn.fully_connected(dropout1, 64, activation='tanh',
regularizer='L2', weight_decay=0.001)
dropout2 = tflearn.dropout(dense2, 0.8)
softmax = tflearn.fully_connected(dropout2, 10, activation='softmax')
# Regression using SGD with learning rate decay and Top-3 accuracy
sgd = tflearn.SGD(learning_rate=0.1, lr_decay=0.96, decay_step=1000)
top_k = tflearn.metrics.Top_k(3)
net = tflearn.regression(softmax, optimizer=sgd, metric=top_k,
loss='categorical_crossentropy')
# Training
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=10, validation_set=(testX, testY),
show_metric=True, run_id="mnist")
def do_svm_1d(x_train, y_train,x_test, y_test):
print "SVM and 1d"
clf = svm.SVC(decision_function_shape='ovo')
print clf
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print metrics.accuracy_score(y_test, y_pred)
#print metrics.confusion_matrix(y_test, y_pred)
def do_knn_1d(x_train, y_train,x_test, y_test):
print "KNN and 1d"
clf = neighbors.KNeighborsClassifier(n_neighbors=15)
print clf
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print metrics.accuracy_score(y_test, y_pred)
#print metrics.confusion_matrix(y_test, y_pred)
if __name__ == "__main__":
print "Hello MNIST"
#X, Y, testX, testY = mnist.load_data(one_hot=False)
#1d
#print testX
#do_dnn_1d(X, Y, testX, testY)
#do_svm_1d(X, Y, testX, testY)
#do_knn_1d(X, Y, testX, testY)
#2d
X, Y, testX, testY = mnist.load_data(one_hot=True)
X = X.reshape([-1, 28, 28, 1])
testX = testX.reshape([-1, 28, 28, 1])
#cnn
do_cnn_2d(X, Y, testX, testY)
Hello MNIST
Extracting mnist/train-images-idx3-ubyte.gz
Extracting mnist/train-labels-idx1-ubyte.gz
Extracting mnist/t10k-images-idx3-ubyte.gz
Extracting mnist/t10k-labels-idx1-ubyte.gz
WARNING:tensorflow:From /anaconda2/envs/python27/lib/python2.7/site-packages/tflearn/initializations.py:119: __init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
WARNING:tensorflow:From /anaconda2/envs/python27/lib/python2.7/site-packages/tflearn/objectives.py:66: calling reduce_sum (from tensorflow.python.ops.math_ops) with keep_dims is deprecated and will be removed in a future version.
Instructions for updating:
keep_dims is deprecated, use keepdims instead
2019-02-28 22:46:00.126728: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2019-02-28 22:46:00.127120: I tensorflow/core/common_runtime/process_util.cc:69] Creating new thread pool with default inter op setting: 12. Tune using inter_op_parallelism_threads for best performance.
---------------------------------
Run id: mnist
Log directory: /tmp/tflearn_logs/
---------------------------------
Training samples: 55000
Validation samples: 10000
--
Training Step: 100 | total loss: 0.23707 | time: 21.973s
| Adam | epoch: 001 | loss: 0.23707 - acc: 0.9352 | val_loss: 0.21428 - val_acc: 0.9334 -- iter: 06400/55000
--
Training Step: 200 | total loss: 0.18114 | time: 44.004s
| Adam | epoch: 001 | loss: 0.18114 - acc: 0.9513 | val_loss: 0.17256 - val_acc: 0.9526 -- iter: 12800/55000
--
Training Step: 300 | total loss: 0.20485 | time: 65.309s
| Adam | epoch: 001 | loss: 0.20485 - acc: 0.9481 | val_loss: 0.11869 - val_acc: 0.9683 -- iter: 19200/55000
--
Training Step: 400 | total loss: 0.21222 | time: 86.193s
| Adam | epoch: 001 | loss: 0.21222 - acc: 0.9458 | val_loss: 0.14559 - val_acc: 0.9587 -- iter: 25600/55000
--
Training Step: 500 | total loss: 0.14734 | time: 106.709s
| Adam | epoch: 001 | loss: 0.14734 - acc: 0.9621 | val_loss: 0.11679 - val_acc: 0.9663 -- iter: 32000/55000
--
Training Step: 600 | total loss: 0.23280 | time: 127.229s
| Adam | epoch: 001 | loss: 0.23280 - acc: 0.9461 | val_loss: 0.13877 - val_acc: 0.9636 -- iter: 38400/55000
--
Training Step: 700 | total loss: 0.17027 | time: 147.721s
| Adam | epoch: 001 | loss: 0.17027 - acc: 0.9535 | val_loss: 0.13384 - val_acc: 0.9622 -- iter: 44800/55000
--
Training Step: 800 | total loss: 0.14994 | time: 168.609s
| Adam | epoch: 001 | loss: 0.14994 - acc: 0.9632 | val_loss: 0.17848 - val_acc: 0.9583 -- iter: 51200/55000
--
Training Step: 860 | total loss: 0.28218 | time: 182.671s
| Adam | epoch: 001 | loss: 0.28218 - acc: 0.9349 | val_loss: 0.11919 - val_acc: 0.9648 -- iter: 55000/55000
--
Training Step: 900 | total loss: 0.18352 | time: 10.988s
| Adam | epoch: 002 | loss: 0.18352 - acc: 0.9533 | val_loss: 0.11164 - val_acc: 0.9692 -- iter: 02560/55000
--
Training Step: 1000 | total loss: 0.19945 | time: 31.601s
| Adam | epoch: 002 | loss: 0.19945 - acc: 0.9451 | val_loss: 0.08605 - val_acc: 0.9755 -- iter: 08960/55000
--
Training Step: 1100 | total loss: 0.13136 | time: 52.122s
| Adam | epoch: 002 | loss: 0.13136 - acc: 0.9642 | val_loss: 0.12763 - val_acc: 0.9677 -- iter: 15360/55000
--
Training Step: 1200 | total loss: 0.14219 | time: 72.529s
| Adam | epoch: 002 | loss: 0.14219 - acc: 0.9605 | val_loss: 0.15559 - val_acc: 0.9617 -- iter: 21760/55000
--
Training Step: 1300 | total loss: 0.19778 | time: 92.921s
| Adam | epoch: 002 | loss: 0.19778 - acc: 0.9535 | val_loss: 0.20129 - val_acc: 0.9533 -- iter: 28160/55000
--
Training Step: 1400 | total loss: 0.13318 | time: 113.328s
| Adam | epoch: 002 | loss: 0.13318 - acc: 0.9715 | val_loss: 0.11842 - val_acc: 0.9690 -- iter: 34560/55000
--
Training Step: 1500 | total loss: 0.19926 | time: 133.612s
| Adam | epoch: 002 | loss: 0.19926 - acc: 0.9478 | val_loss: 0.11421 - val_acc: 0.9731 -- iter: 40960/55000
--
Training Step: 1600 | total loss: 0.12594 | time: 153.812s
| Adam | epoch: 002 | loss: 0.12594 - acc: 0.9629 | val_loss: 0.11782 - val_acc: 0.9681 -- iter: 47360/55000
--
Training Step: 1700 | total loss: 0.78151 | time: 174.091s
| Adam | epoch: 002 | loss: 0.78151 - acc: 0.8975 | val_loss: 0.13214 - val_acc: 0.9616 -- iter: 53760/55000
--
Training Step: 1720 | total loss: 0.29896 | time: 181.506s
| Adam | epoch: 002 | loss: 0.29896 - acc: 0.9419 | val_loss: 0.15414 - val_acc: 0.9629 -- iter: 55000/55000
--
Training Step: 1800 | total loss: 0.15498 | time: 16.987s
| Adam | epoch: 003 | loss: 0.15498 - acc: 0.9609 | val_loss: 0.11366 - val_acc: 0.9742 -- iter: 05120/55000
--
Training Step: 1900 | total loss: 0.13825 | time: 37.492s
| Adam | epoch: 003 | loss: 0.13825 - acc: 0.9695 | val_loss: 0.11234 - val_acc: 0.9728 -- iter: 11520/55000
--
Training Step: 2000 | total loss: 0.12714 | time: 57.719s
| Adam | epoch: 003 | loss: 0.12714 - acc: 0.9682 | val_loss: 0.11303 - val_acc: 0.9710 -- iter: 17920/55000
--
Training Step: 2100 | total loss: 0.21021 | time: 77.934s
| Adam | epoch: 003 | loss: 0.21021 - acc: 0.9485 | val_loss: 0.22704 - val_acc: 0.9481 -- iter: 24320/55000
--
Training Step: 2200 | total loss: 0.23903 | time: 98.205s
| Adam | epoch: 003 | loss: 0.23903 - acc: 0.9455 | val_loss: 0.10283 - val_acc: 0.9754 -- iter: 30720/55000
--
Training Step: 2300 | total loss: 0.17479 | time: 118.532s
| Adam | epoch: 003 | loss: 0.17479 - acc: 0.9539 | val_loss: 0.18577 - val_acc: 0.9547 -- iter: 37120/55000
--
Training Step: 2400 | total loss: 0.19227 | time: 138.958s
| Adam | epoch: 003 | loss: 0.19227 - acc: 0.9595 | val_loss: 0.12943 - val_acc: 0.9718 -- iter: 43520/55000
--
Training Step: 2500 | total loss: 0.14132 | time: 160.344s
| Adam | epoch: 003 | loss: 0.14132 - acc: 0.9647 | val_loss: 0.13724 - val_acc: 0.9688 -- iter: 49920/55000
--
Training Step: 2580 | total loss: 0.32577 | time: 178.589s
| Adam | epoch: 003 | loss: 0.32577 - acc: 0.9310 | val_loss: 0.11680 - val_acc: 0.9690 -- iter: 55000/55000
--
Training Step: 2600 | total loss: 0.17508 | time: 7.594s
| Adam | epoch: 004 | loss: 0.17508 - acc: 0.9567 | val_loss: 0.13616 - val_acc: 0.9653 -- iter: 01280/55000
--
Training Step: 2700 | total loss: 0.18165 | time: 29.070s
| Adam | epoch: 004 | loss: 0.18165 - acc: 0.9556 | val_loss: 0.11999 - val_acc: 0.9695 -- iter: 07680/55000
--
Training Step: 2800 | total loss: 0.25837 | time: 49.735s
| Adam | epoch: 004 | loss: 0.25837 - acc: 0.9361 | val_loss: 0.11596 - val_acc: 0.9671 -- iter: 14080/55000
--
Training Step: 2900 | total loss: 0.18496 | time: 70.121s
| Adam | epoch: 004 | loss: 0.18496 - acc: 0.9522 | val_loss: 0.12122 - val_acc: 0.9694 -- iter: 20480/55000
--
Training Step: 3000 | total loss: 0.17320 | time: 90.243s
| Adam | epoch: 004 | loss: 0.17320 - acc: 0.9557 | val_loss: 0.13800 - val_acc: 0.9690 -- iter: 26880/55000
--
Training Step: 3100 | total loss: 0.22497 | time: 110.382s
| Adam | epoch: 004 | loss: 0.22497 - acc: 0.9565 | val_loss: 0.09631 - val_acc: 0.9767 -- iter: 33280/55000
--
Training Step: 3200 | total loss: 0.29756 | time: 130.620s
| Adam | epoch: 004 | loss: 0.29756 - acc: 0.9476 | val_loss: 0.11103 - val_acc: 0.9754 -- iter: 39680/55000
--
Training Step: 3300 | total loss: 0.17303 | time: 150.856s
| Adam | epoch: 004 | loss: 0.17303 - acc: 0.9578 | val_loss: 0.09186 - val_acc: 0.9773 -- iter: 46080/55000
--
Training Step: 3400 | total loss: 0.20348 | time: 171.002s
| Adam | epoch: 004 | loss: 0.20348 - acc: 0.9499 | val_loss: 0.14783 - val_acc: 0.9694 -- iter: 52480/55000
--
Training Step: 3440 | total loss: 0.30359 | time: 181.752s
| Adam | epoch: 004 | loss: 0.30359 - acc: 0.9469 | val_loss: 0.12137 - val_acc: 0.9676 -- iter: 55000/55000
--
Training Step: 3500 | total loss: 0.15811 | time: 13.707s
| Adam | epoch: 005 | loss: 0.15811 - acc: 0.9588 | val_loss: 0.11515 - val_acc: 0.9740 -- iter: 03840/55000
--
Training Step: 3600 | total loss: 0.16006 | time: 33.716s
| Adam | epoch: 005 | loss: 0.16006 - acc: 0.9574 | val_loss: 0.10713 - val_acc: 0.9744 -- iter: 10240/55000
--
Training Step: 3700 | total loss: 0.17246 | time: 53.911s
| Adam | epoch: 005 | loss: 0.17246 - acc: 0.9568 | val_loss: 0.16608 - val_acc: 0.9643 -- iter: 16640/55000
--
Training Step: 3800 | total loss: 0.21726 | time: 74.185s
| Adam | epoch: 005 | loss: 0.21726 - acc: 0.9483 | val_loss: 0.14261 - val_acc: 0.9670 -- iter: 23040/55000
--
Training Step: 3900 | total loss: 0.15574 | time: 94.402s
| Adam | epoch: 005 | loss: 0.15574 - acc: 0.9625 | val_loss: 0.12009 - val_acc: 0.9719 -- iter: 29440/55000
--
Training Step: 4000 | total loss: 0.17456 | time: 114.602s
| Adam | epoch: 005 | loss: 0.17456 - acc: 0.9528 | val_loss: 0.12209 - val_acc: 0.9688 -- iter: 35840/55000
--
Training Step: 4100 | total loss: 0.23761 | time: 134.784s
| Adam | epoch: 005 | loss: 0.23761 - acc: 0.9554 | val_loss: 0.21905 - val_acc: 0.9532 -- iter: 42240/55000
--
Training Step: 4200 | total loss: 0.28392 | time: 155.018s
| Adam | epoch: 005 | loss: 0.28392 - acc: 0.9301 | val_loss: 0.12684 - val_acc: 0.9705 -- iter: 48640/55000
--
Training Step: 4300 | total loss: 0.28396 | time: 175.318s
| Adam | epoch: 005 | loss: 0.28396 - acc: 0.9369 | val_loss: 0.11673 - val_acc: 0.9721 -- iter: 55000/55000
--
(python27) zhanglipengdeMacBook-Pro:code zhanglipeng$