"Logistics回归分类函数"defclassifyVector(inX, weights):
prob = sigmoid(sum(inX*weights))if prob >0.5:return1.0else:return0.0# %%defcoliTest():# 文件读取以及模型训练import os
path = os.path.realpath(os.curdir)# 获取当前目录的绝对路径
path1 = os.path.join(path,"horseColicTraining.txt")# 加上文件名
path2 = os.path.join(path,"horseColicTest.txt")
frTrain =open(path1)
frTest =open(path2)
trainingSet =[]; trainingLabels =[]for line in frTrain.readlines():
currLine = line.strip().split('\t')
lineArr =[]for i inrange(21):
lineArr.append(float(currLine[i]))
trainingSet.append(lineArr)
trainingLabels.append(float(currLine[i]))
trainWeights = stocGradAscent0(np.array(trainingSet), trainingLabels,500)
errorCount =0; numTestVec =0.0for line in frTest.readlines():
numTestVec +=1.0
currLine = line.strip().split('\t')
lineArr =[]for i inrange(21):
lineArr.append(float(currLine[i]))ifint(classifyVector(np.array(lineArr), trainWeights))!=int(currLine[21]):
errorCount +=1
errorRate =(float(errorCount)/numTestVec)print("the error rate of this test is: %f"% errorRate)return errorRate
# %%defmultiTest():# 调用coliTest并运行10次
numTests =10; errorSum =0.0for k inrange(numTests):
errorSum += coliTest()print("after %d iterations the average error rate is %f"%(numTests, errorSum /float(numTests)))# %%
multiTest()
[43]multiTest()
multiTest()
the error rate of this test is:0.298507
the error rate of this test is:0.298507
the error rate of this test is:0.298507
the error rate of this test is:0.298507
the error rate of this test is:0.298507
the error rate of this test is:0.298507
the error rate of this test is:0.298507
the error rate of this test is:0.298507
the error rate of this test is:0.298507
the error rate of this test is:0.298507
after 10 iterations the average error rate is0.298507
from math import *import numpy as np#%%"回归梯度上升优化算法"def loadDataSet(): # 文件读取 dataMat = [] labelMat = [] fr = open(r'C:\Users\xuning\PycharmProjects\machine learning\Logistics Regression\testSet.txt','r') for line in fr.readlines():