import numpy as np
class logistic:
def __init__(self, fileName, splitBy='\t'):
self.lr = 1e-3
self.epochs = 3000 # 定义学习率和训练轮数
data = np.array([line.split(splitBy) for line in open(fileName).readlines()], dtype='float')
self.X = np.array(data[:, :-1],dtype='float').T
self.Y = np.array([data[:, -1]],dtype='float') # 读取数据, X 为 21 x 299 的矩阵, Y 为 1 x 299 的矩阵
ones_line = np.ones(shape=(1,self.Y.shape[1]),dtype='float')
self.X = np.row_stack((self.X,ones_line)) #这里在训练数据集X的最后一行加上一行 1 x 299 的全 1 矩阵 ,目的是为了bias的计算
self.W = np.zeros(shape=(self.X.shape[0], 1)) # 定义参数weight, 形状为 22 x 1 ,和前面的 X 相乘
# X 和 weight 的行数都加一, 这样 weight * X 等效于 weight * X + bias
def _WX(self): # weight * X
return np.matmul(self.W.T,self.X)
def _sigmod(self,x): # 计算 sigmod 函数
return np.array((1 / (1 + np.exp(-x))), dtype='float')
def _Logistic(self): # 计算输出结果
return self._sigmod(self._WX())
def _CrossEntropy(self): # cost 定义为交叉熵
p1 = np.log(self._Logistic()).dot(-self.Y.T)
p2 = np.log(1-self._Logistic()).dot(1-self.Y.T)
return np.array((p1-p2)/self.Y.shape[1],dtype='float')
def _Gradient(self): # 对交叉熵求导得到梯度
return self.X.dot((self._Logistic() - self.Y).T) / self.Y.shape[1]
def train(self): # 进行训练
for i in range(self.epochs):
self.W -= self.lr*self._Gradient()
print('第{}轮训练,交叉熵为:{}'.format(i+1,float(self._CrossEntropy())))
def predict(self,x): # 进行预测
'''
因为 W 的形状为 22 x 1 所以输入 x 需要在最后一行添加一行全 1 矩阵
'''
return self._sigmod(np.matmul(self.W.T,x))
def test(self, fileName=None, splitBy='\t'): # 进行测试
if fileName == None:
y = self.predict(self.X)[0]
for i in range(y.__len__()):
if y[i]>=0.5:
y[i]=1
else:
y[i]=0
y_ = self.Y[0]
true = 0
for j in range(y_.__len__()):
if y[j] == y_[j]:
true += 1
print('训练集上正确率:{}'.format(true/y_.__len__()))
else:
data = np.array([line.split(splitBy) for line in open(fileName).readlines()], dtype='float')
x = np.array(data[:, :-1],dtype='float').T
y_ = np.array([data[:, -1]],dtype='float')[0]
ones_line = np.ones(shape=(1, y_.shape[0]), dtype='float')
x = np.row_stack((x,ones_line))
y = self.predict(x)[0]
for i in range(y.__len__()):
if y[i] >= 0.5:
y[i] = 1
else:
y[i] = 0
true = 0
for j in range(y_.__len__()):
if y[j] == y_[j]:
true += 1
print('测试集上正确率:{}'.format(true / y_.__len__()))
if __name__ == '__main__':
lr = logistic('./logistic/logisticTraining.txt')
lr.train()
lr.test()
lr.test(fileName='./logistic/logisticTest.txt')
数据集链接:https://pan.baidu.com/s/10pI9Y7w3MCYLhz29RhdZEg
提取码:3ajp