数据集MNIST。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import time
import math
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
class Softmax(object):
def __init__(self):
self.learning_rate = 0.000001 # 学习速率
self.max_iteration = 100000 # 最大迭代次数
self.weight_lambda = 0.01 # 衰退权重
def cal_e(self, x, l):
theta_1 = self.w[l]
product = np.dot(theta_1, x)
return math.exp(product)
def cal_probability(self, x, j):
molecule = self.cal_e(x, j)
denominator = sum([self.cal_e(x, i) for i in range(self.k)])
return molecule / denominator
def cal_partial_derivative(self, x, y, j):
first = int(y == j) # 计算I示性函数
second = self.cal_probability(x, j) # 计算概率
# 当j是真实标签类别时,要减去1,否则减去0(结合笔记的公式理解)
return x * (second - first) + self.weight_lambda * self.w[j] # 对w的偏导
def train(self, features, labels):
self.k = len(set(labels)) # 分类数
# 权重矩阵,画图理解W*x的过程
self.w = np.zeros((self.k, len(features[0]) + 1))
iteration = 0
while iteration < self.max_iteration:
# print('loop %d:' % iteration)
iteration += 1
# 每次随机选一个点用来计算梯度并迭代
index = random.randint(0, len(labels) - 1)
x = features[index]
y = labels[index]
x = list(x)
x.append(1.0) # 为了与偏置乘
x = np.array(x)
derivatives = [self.cal_partial_derivative(x, y, j) for j in range(self.k)]
for j in range(self.k):
self.w[j] -= self.learning_rate * derivatives[j]
def _predict(self, x):
result = np.dot(self.w, x)
row, column = result.shape
# 找出最大值所在列
_position = np.argmax(result)
m, _ = divmod(_position, column)
return m
def predict(self, features):
labels = []
for feature in features:
x = list(feature)
x.append(1)
x = np.matrix(x)
x = np.transpose(x)
labels.append(self._predict(x))
return labels
if __name__ == '__main__':
print('Start reading data:')
time1 = time.time()
raw_data = pd.read_csv('data/train.csv')
data = raw_data.values
imgs = data[:, 1:]
labels = data[:, 0]
train_features, test_features, train_labels, test_labels = train_test_split(imgs, labels, test_size=0.33, random_state=11111)
time2 = time.time()
print('reading data cost %f seconds' % (time2 - time1))
print('Start training:')
softmax = Softmax()
softmax.train(train_features, train_labels)
time3 = time.time()
print('training cost %f seconds' % (time3 - time2))
print('Start predicting:')
test_predict = softmax.predict(test_features)
time4 = time.time()
print('predicting cost %f seconds' % (time4 - time3))
accuracy = sum([test_labels[i] == test_predict[i] for i in range(len(test_labels))]) / len(test_labels)
print('The accuracy is %f!' % accuracy)
'''
output:
Start reading data:
reading data cost 5.960523 seconds
Start training:
training cost 39.005752 seconds
Start predicting:
predicting cost 1.706823 seconds
The accuracy is 0.871573!
'''