参考:https://blog.csdn.net/c406495762/article/details/77851973
import numpy as np
import random
'''
sigmoid()函数定义
'''
def sigmoid(inX):
return 1.0/(1 + np.exp(-inX))
'''
梯度上升算法
'''
def gradAscent(dataSet,classLabels): #数据集和数据标签
dataMatrix = np.mat(dataSet) #转换为nump mat
labelMat = np.mat(classLabels).transpose()
m,n=np.shape(dataMatrix)
alpha = 0.01 #学习速率
maxCycle = 500 #迭代次数
weights = np.ones((n,1)) #初始化权重系数全为1
print(weights)
for k in range(maxCycle):
h = sigmoid(dataMatrix*weights)
error = labelMat - h
weights = weights + alpha*dataMatrix.transpose()*error
return weights.getA()
def colicTest():
frTrain = open('G:\MLiA_SourceCode\machinelearninginaction\Ch05\horseColicTraining.txt')
frTest = open('G:\MLiA_SourceCode\machinelearninginaction\Ch05\horseColicTest.txt')
trainingSet = [];trainingLabels = []
for line in frTrain.readlines():
currLine = line.strip().split('\t')
lineArr = []
for i in range(len(currLine)-1):
lineArr.append(float(currLine[i]))
trainingSet.append(lineArr)
trainingLabels.append(float(currLine[-1]))
trainingWeights = gradAscent(np.array(trainingSet),trainingLabels)
errorCount = 0; numTestVec = 0.0
for line in frTest.readlines():
numTestVec += 1.0
currLine = line.strip().split('\t')
lineArr = []
for i in range(len(currLine)-1):
lineArr.append(float(currLine[i]))
if int(classifyVector(np.array(lineArr),trainingWeights[:,0]))!= int(currLine[-1]):
errorCount += 1
errorRate = (float(errorCount)/numTestVec)*100
print("测试集错误率为:%.2f%%" % errorRate)
def classifyVector(inX, weights):
prob = sigmoid(sum(inX*weights))
if prob > 0.5: return 1.0
else: return 0.0
if __name__ == '__main__':
colicTest()