基于卷积神经网络的手写字符压缩分类
压缩感知:通过对数据进行按照一定数据量进行压缩,传输,然后再利用重构算法将压缩的数据重构出原始的数据。
将压缩感知与卷积神经网络结合,不但可以提高重构的精度,而且可以忽略信号的稀疏字典的这一项要求。
所在数据不完整的情况,对数据进行分类,可以使用卷积神经网络实现。
1.随机采样矩阵不训练
手写字符集的分类过程可以表示为:
使用Tensorflow框架,对图中过程进行代码整理:
python2.7
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import matplotlib.pyplt as plt
#加载手写字符集
mnist = input_data.read_data_sets('/home/awen/Juanjuan/image_classifier/data/MNIST_data/', one_hot=True)
trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
image = trX #训练集
tesimage = teX #测试集
#一些参数
learning_rate=0.001
n_classes = 10
batch_size = 55
training_iters = 200000
#初始化占位:
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
y = tf.placeholder(tf.int64, [None, n_classes])
#压缩比
rt = 0.1
dropout = 0.75
keep_prob = tf.placeholder(tf.float32)
#conv
def conv2d(name, x, W, b, strides=1):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x, name=name)
def maxpool2d(name, x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)
def norm(name, linput, lsize=4):
return tf.nn.lrn(linput, lsize, bias=1.0, alpha=0.001/9.0, beta=0.75, name=name)
#权重和偏差值用的笨办法进行的初始化定义,这种定义方法真的太笨了,唉...
weights = {
'w1': tf.Variable(tf.random_normal([5, 5, 1, 20], stddev=0.01)),
'w2': tf.Variable(tf.random_normal([5, 5, 20, 50], stddev=0.01)),
'wd1': tf.Variable(tf.random_normal([7*7*50, 500], stddev=0.01)),
'out': tf.Variable(tf.random_normal([500, 10], stddev=0.01)),
}
biases = {
'bc1': tf.Variable(tf.random_normal([20])),
'bc2': tf.Variable(tf.random_normal([50])),
'bd1': tf.Variable(tf.random_normal([500])),
'out': tf.Variable(tf.random_normal([10]))
}
#网络:
def cs_lenet5(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
conv1 = conv2d('conv1', x, weights['w1'], biases['bc1'])
pool1 = maxpool2d('pool1', conv1, k=2)
conv2 = conv2d('conv2', pool1, weights['w2'], biases['bc2'])
pool2 = maxpool2d('pool2', conv2, k=2)
#fully connected layers
fc1 = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
pred = cs_lenet5(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) #准确率计算
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
mse_loss1 = []
mse_loss2 = []
accuracy1 = collections.defaultdict(list)
for epoch in range(50):
print epoch
m = np.random.normal(0, 0.1, (image.shape[1], int(rt*image.shape[1]))) #采样矩阵**
#image sampling
sm = np.dot(image, m)
rec_m = np.dot(sm, m.T)
rec = rec_m.reshape((-1, 28, 28, 1))
m1 = np.random.normal(0, 0.5, (image.shape[1], int(rt*image.shape[1]))) #重构过程**
tesm = np.dot(tesimage, m1)
rec_tesm = np.dot(tesm, m1.T)
rec_tes = rec_tesm.reshape((-1, 28, 28, 1))
for i in range(1000):
batch_x, batch_y = rec[i*batch_size:(i+1)*batch_size], trY[i*batch_size:(i+1)*batch_size]
_, loss, acc = sess.run([optimizer, cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1})
if i % 100 == 0:
print 'After %d training step(s),'%(i+1),'loss on training is {:.6f}'.format(loss), 'Training accuracy is {:.6f}'.format(acc)
tesloss, tesacc = sess.run([cost, accuracy], feed_dict={x: rec_tes, y: teY, keep_prob: 1})
print 'loss on test is {:.6f}'.format(tesloss), 'Test accuracy is {:.6f}'.format(tesacc)
mse_loss1.append(acc)
mse_loss2.append(tesacc)
accuracy1['train'] = mse_loss1
accuracy1['test'] = mse_loss2
plt.plot(mse_loss1, color='red', label='train data')
plt.plot(mse_loss2, color='black', label='test data')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
2.训练随机矩阵
手写字符集的分类过程将上图中的Φ进行训练,形成端到端的训练过程。
#convolution
def conv2d_cs(name, x, W, strides):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='VALID')
return tf.nn.relu(x, name=name)
def conv2d(name, x, W, b, strides=1):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x, name=name)
def maxpool2d(name, x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME', name=name)
def norm(name, linput, lsize=4):
return tf.nn.lrn(linput, lsize, bias=1.0, alpha=0.001/9.0, beta=0.75, name=name)
weights = {
'wc1': tf.Variable(tf.random_normal([28, 28, 1, 78], stddev=0.01)),
'wc2': tf.Variable(tf.random_normal([1, 1, 78, 784], stddev=0.01)),
'w1': tf.Variable(tf.random_normal([5, 5, 1, 20], stddev=0.01)),
'w2': tf.Variable(tf.random_normal([5, 5, 20, 50], stddev=0.01)),
'wd1': tf.Variable(tf.random_normal([7*7*50, 500], stddev=0.01)),
'out': tf.Variable(tf.random_normal([500, 10], stddev=0.01)),
}
biases = {
'bc1': tf.Variable(tf.random_normal([20])),
'bc2': tf.Variable(tf.random_normal([50])),
'bd1': tf.Variable(tf.random_normal([500])),
'out': tf.Variable(tf.random_normal([10]))
}
def cs_lenet5(x, weights, biases, dropout):
x = tf.reshape(x, shape=[-1, 28, 28, 1])
#norm1 = norm('norm1', x, lsize=4)
conv1_cs = conv2d_cs('conv1_cs', x, weights['wc1'], strides=28) #采样层**
conv2_cs = conv2d_cs('conv2_cs', conv1_cs, weights['wc2'], strides=1) #重构层**
conv3_rh = tf.reshape(conv2_cs, [-1, 28, 28, 1])
#norm1 = norm('norm1', conv3_rh)
conv1 = conv2d('conv1', conv3_rh, weights['w1'], biases['bc1'])
pool1 = maxpool2d('pool1', conv1, k=2)
conv2 = conv2d('conv2', pool1, weights['w2'], biases['bc2'])
pool2 = maxpool2d('pool2', conv2, k=2)
#fully connected layers:
fc1 = tf.reshape(pool2, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
#fc1 = tf.nn.dropout(fc1, dropout)
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out, conv3_rh
pred, csimg = cs_lenet5(x, weights, biases, keep_prob)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999).minimize(cost)
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session() as sess:
sess.run(init_op)
mse_loss1 = []
mse_loss2 = []
accuracy1 = collections.defaultdict(list)
for epoch in range(50):
print epoch
for i in range(1000):
batch_x, batch_y = trx[i*batch_size:(i+1)*batch_size], trY[i*batch_size:(i+1)*batch_size]
_, img1, loss, acc = sess.run([optimizer, csimg, cost, accuracy], feed_dict={x: batch_x, y: batch_y, keep_prob: 1.0})
if i % 100 == 0:
print 'After %d training step(s),'%(i+1),'loss on training is {:.6f}'.format(loss), 'Training accuracy is {:.6f}'.format(acc)
tesloss, tesacc = sess.run([cost, accuracy], feed_dict={x: tex, y: teY, keep_prob: 1.0})
print 'loss on test is {:.6f}'.format(tesloss), 'Test accuracy is {:.6f}'.format(tesacc)
mse_loss1.append(acc)
mse_loss2.append(tesacc)
accuracy1['train'] = mse_loss1
accuracy1['test'] = mse_loss2
plt.plot(mse_loss1, color='red', label='train data')
plt.plot(mse_loss2, color='black', label='test data')
plt.xlabel('epoch')
plt.ylabel('Accuracy')
通过对比训练和不训练两种情况下的手写字符集的准确率,结果如下图所示:
我们可以发现将采样矩阵进行训练,在压缩比为0.01的情况下,训练得到的结果依然很高。
参考论文:
[1].Lohit S , Kulkarni K , Turaga P . Direct inference on compressive measurements using convolutional neural networks[C]// IEEE International Conference on Image Processing. IEEE, 2016.
[2].Adler A, Elad M, Zibulevsky M. Compressed Learning: A Deep Neural Network Approach[J]. 2016.
我写的代码真的太啰嗦了,在学习的路上继续改进吧。