# --*-- coding:utf-8 --*--
import numpy as np
class Logistic:
def loadDataSet(self, fileName = 'testSet.txt'): # 加载数据
dataMat = []
labelMat = []
fr = open(fileName)
for line in fr.readlines(): # 遍历文件
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) # 数据集 x=(1,x(1),x(2),x(3)⋯,x(n)) 100 * 3
labelMat.append(int(lineArr[-1])) # 类别标签 100
return dataMat, labelMat
def sigmoid(self, inX):
return 1.0 / (1 + np.exp(-inX))
def train(self, dataSet, labels): # 训练
dataMat = np.mat(dataSet) # 将数据集转成矩阵的形式 shape=(100, 3)
labelMat = np.mat(labels).transpose() # 将类别集合转成矩阵的形式 shape=(100, 1)
print(dataMat.shape)
m, n = np.shape(dataSet) # 行列
alpha = 0.01
maxIter = 500
weights = np.ones((n, 1)) # w=(b,w(1),w(2),w(3)⋯,w(n))
print(type(weights))
for i in range(1): # 迭代
h = self.sigmoid(dataMat * weights) # (100, 1)
error = h - labelMat # 预测值和标签值所形成的误差 (100, 1)
weights = weights - alpha * dataMat.transpose() * error # 权重的更新
return weights
def nparraytrain(self, dataSet, labels):
dataSet = np.array(dataSet) # (100, 3)
labelSet = np.array(labels) # (100,)
labelSet = labelSet[:, np.newaxis] # (100, 1)
m, n = np.shape(dataSet)
alpha = 0.01
maxIter = 500
weights = np.ones((n, 1)) # (3, 1)
for i in range(1):
h = self.sigmoid(np.dot(dataSet, weights)) # (100, 1)
error = h - labelSet # (100, 1)
weights = weights - alpha * np.dot(dataSet.transpose(), error)
# weights = weights - alpha * dataSet.transpose() * error # 这么写是错误的
return weights
def classify(self, X, weights): # 没有运行验证过。
prob = self.sigmoid(sum(X * weights)) # 为什么求sum
if prob > 0.5:
return 1.0
else:
return 0.0
if __name__ == '__main__':
logistic = Logistic()
dataSet, labels = logistic.loadDataSet()
weights = logistic.nparraytrain(dataSet, labels)
print(weights)