为了缩短调试所费时间,所有训练和测试都只截取了部分数据集;并
且灰度直方图和图像像素两个算法间区别在于训练和测试时输入的数
据不同,前者为关于灰度的一维数组,后者为图像的数字矩阵
- 以灰度直方图为特征,调用库函数实现逻辑回归功能
"""
以灰度直方图为特征,调用逻辑回归函数作为分类器对MNIST进行分类
"""
import pickle
import gzip
import numpy as np
import random
from sklearn.linear_model import LogisticRegression
def load_data():
# 需为.gz压缩,gzip.open()打不开.zip
f = gzip.open('F:/jupyter/mnist.pkl.gz', 'rb')
# pickle模块可以序列化对象并保存到磁盘中,并在需要的时候读取出来,
# 任何对象都可以执行序列化操作
training_data, validation_data, test_data = pickle.load(f, encoding='bytes')
f.close()
return(training_data, validation_data, test_data)
def convert_to_gray_hist(training_data):
L = []
for x in training_data:
img_x = np.asarray(x)
pix = img_x.ravel()
N = np.zeros(256)
for k in pix:
k = int(k * 256)
N[k] += 1
L.append(list(N))
return L
def logistic_mnist():
training_data, validation_data, test_data = load_data()
# random.shuffle(training_data)
mini_batch_size = 1000
training_data_data = training_data[0]
training_data_label = training_data[1]
# 对训练集随机切片取不分进行训练
k = random.randint(0, 45000)
mini_train_batch = [0,0]#这个地方不能少了[0,0]
mini_train_batch[0] = training_data_data[k:k + mini_batch_size * 5]
mini_train_batch[1] = training_data_label[k:k + mini_batch_size * 5]
# 用默认值作为训练模型时需要的参数
# tol:停止求解的标准,默认为1e-4。就是求解到多少的时候,停止,认为已经求出最优解
moxing = LogisticRegression(penalty = 'l2', tol = 0.001)
# 进行模型训练
mini_train_batch_hist = convert_to_gray_hist(mini_train_batch[0])
moxing.fit(mini_train_batch_hist, mini_train_batch[1])
# 用验证集测试
validation_data_data = validation_data[0]
validation_data_label = validation_data[1]
# 对验证集随机切片取不分进行测试
k = random.randint(0, 9000)
mini_validation_batch = [0,0]#这个地方不能少了[0,0]
mini_validation_batch[0] = validation_data_data[k:k + mini_batch_size]
mini_validation_batch[1] = validation_data_label[k:k + mini_batch_size]
# 对验证集预测
mini_validation_batch_hist = convert_to_gray_hist(mini_validation_batch[0])
pred_validation = [int(a) for a in moxing.predict(mini_validation_batch_hist)]
# 计算验证集准确率
num_correct = 0
for a, y in zip(pred_validation, mini_validation_batch[1]):
if a == y:
num_correct += 1
print('验证集总数:%d, 正确个数:%d, 正确率为:%f'%(mini_batch_size,
num_correct, 1.0 * num_correct / mini_batch_size))
# 用测试集测试
test_data_data = test_data[0]
test_data_label = test_data[1]
# 对测试集随机切片取不分进行测试
k = random.randint(0, 9000)
mini_test_batch = [0,0]#这个地方不能少了[0,0]
mini_test_batch[0] = test_data_data[k:k + mini_batch_size]
mini_test_batch[1] = test_data_label[k:k + mini_batch_size]
# 进行预测
mini_test_batch_hist = convert_to_gray_hist(mini_test_batch[0])
pred_test = [int(a) for a in moxing.predict(mini_test_batch_hist)]
# 计算正确率
num_correct = 0
for a, y in zip(pred_test, mini_test_batch[1]):
if a == y:
num_correct += 1
print('测试集总数:%d, 正确个数:%d, 正确率为:%f'%(mini_batch_size, num_correct,
1.0 * num_correct / mini_batch_size))
logistic_mnist()
- 以图像像素为特征,调用库函数实现逻辑回归
"""
以图像像素为特征,调用逻辑回归函数作为分类器对MNIST进行分类
"""
import pickle
import gzip
import numpy as np
import random
from sklearn.linear_model impo