import numpy as np
from sklearn import datasets
def softmax(x, paras):
value = np.exp(np.dot(x, paras))
sum = np.sum(value)
perc = value/sum
return perc
iris = datasets.load_iris()
X = iris.data[:,2:4]
y = iris.target
# 添加一列为1的特征
X_bias =np.ones([len(X),1])
X_train_test = np.hstack((X_bias, X))
num_classes = 3
y_train_test = np.eye(num_classes)[y]
# print(iris.target.shape) # (150,)
#print(iris.data.shape) # (150, 4)
train_percent = 0.6
shuffle_index = np.random.permutation(150)
X_train_test = X_train_test[shuffle_index]
y_train_test = y_train_test[shuffle_index]
X_train = X_train_test[:int(150*train_percent)]
y_train = y_train_test[:int(150*train_percent)]
X_test = X_train_test[int(150*train_percent):]
y_test = y_train_test[int(150*train_percent):]
y_test_1 = np.argmax(y_test, axis=1)
def train_test():
# 初始化参数
para = np.random.rand(3,3)
epoch = 20000
samples = len(X_train)
y_mid = np.zeros([int(150*train_percent),3])
daoshu = np.zeros([3,3])
loss = 0
for i in range(epoch):
loss = 0
daoshu = np.zeros([3,3])
# print('daoshu=', daoshu)
for j in range(samples):
yj = y_train[j]
# print('yj=', yj)
# print('X_train[j]=', X_train[j])
pj = softmax(X_train[j], para)
# print('pj=', pj)
# print('np.log(pj).T=', np.log(pj).T)
loss += -np.dot(yj, np.log(pj).T)
# print('loss in=', loss)
#daoshu = np.multiply(np.subtract(pj, yj), X_train[j])
for k in range(3):
# print('np.subtract(pj[k], yj[k]) in=', np.subtract(pj[k], yj[k]))
# print('np.multiply(np.subtract(pj[k], yj[k]), X_train[j])=', np.multiply(np.subtract(pj[k], yj[k]), X_train[j]))
daoshu[:, k] += np.multiply(np.subtract(pj[k], yj[k]), X_train[j]).T
# print('daoshu in=', daoshu)
#print("daoshu in=", daoshu)
loss = loss/samples
# print('loss out=', loss)
daoshu = daoshu/samples
# print('daoshu out=', daoshu)
para = para-np.multiply(0.01, daoshu)
# print('para out=', para)
if (i+1)%2000 == 0:
print('\ni=', i+1)
print('loss=', loss)
print('para=', para)
y_pred = np.dot(X_test, para)
y_pred = np.argmax(np.dot(X_test, para), axis=1)
print(np.sum((y_pred==y_test_1).astype(int))/(150*(1-train_percent)))
train_test()
输出:
i= 2000
loss= 0.5050712902631536
para= [[ 2.06602856 -0.20443347 -1.32824731]
[-0.22687971 0.34568269 0.24767398]
[-0.41704432 0.65611776 1.96303604]]
0.6833333333333333
i= 4000
loss= 0.4003024132794443
para= [[ 2.9541613 -0.23096979 -2.18984373]
[-0.40146026 0.48742211 0.28051512]
[-0.75027279 0.42649992 2.52588234]]
0.7833333333333333
i= 6000
loss= 0.3454108424592416
para= [[ 3.57064547 -0.18903025 -2.84826744]
[-0.52227931 0.57949177 0.3092645 ]
[-0.97816569 0.24490013 2.93537503]]
0.8333333333333334
i= 8000
loss= 0.3091782305321345
para= [[ 4.0591322 -0.12135313 -3.40443129]
[-0.61543022 0.64303481 0.33887237]
[-1.15760138 0.10203338 3.25767747]]
0.8666666666666667
i= 10000
loss= 0.2825937078139275
para= [[ 4.47125948 -0.04349193 -3.89441976]
[-0.69203841 0.68870641 0.36980897]
[-1.30858544 -0.01173358 3.52242849]]
0.8666666666666667
i= 12000
loss= 0.2618906640587086
para= [[ 4.83136685 0.03787938 -4.33589844]
[-0.75764403 0.72245435 0.40166665]
[-1.4403523 -0.10371202 3.7461738 ]]
0.9166666666666666
i= 14000
loss= 0.24514135659598718
para= [[ 5.15302211 0.11959889 -4.73927321]
[-0.8153586 0.74787342 0.43396214]
[-1.55796442 -0.17916641 3.9392403 ]]
0.9166666666666666
i= 16000
loss= 0.2312253725585263
para= [[ 5.44469222 0.20009712 -5.11144156]
[-0.86709636 0.7672674 0.46630593]
[-1.66454441 -0.24188349 4.10853737]]
0.9166666666666666
i= 18000
loss= 0.2194325974010466
para= [[ 5.71209939 0.27859461 -5.45734622]
[-0.91412081 0.78218025 0.49841752]
[-1.76218754 -0.2946214 4.25891841]]
0.9166666666666666
i= 20000
loss= 0.20928408820673536
para= [[ 5.95934225 0.35472721 -5.78072168]
[-0.95731318 0.79368403 0.53010612]
[-1.85238907 -0.3394213 4.39391984]]
0.9166666666666666