# -- coding: utf-8 -- from numpy import* from math import log import operator import matplotlib.pyplot as plt def loadDataSet(): path = r'E:\file\python\test\test\logisticRession_data\testSet.txt' dataMat = [] labelMat = [] fr = open(path) for line in fr.readlines(): lineArr = line.strip().split() #移除字符串 头尾 中间的空格 dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) #矩阵增广 labelMat.append(int(lineArr[2])) return dataMat, labelMat def sigmoid(inX): return 1.0/(1 + exp(-inX)) #训练权值 #梯度上升 批处理方法 离线 def gradAscent(dataMatIn, classLabels): dataMatrix = mat(dataMatIn) #构造矩阵 100*3 labelMat = mat(classLabels).transpose() #transpose() 转置 变成列向量 100*1 m, n = shape(dataMatrix) #m为行数100 n为列数3 alpha = 0.001 #下降速率参数 maxCycles = 500 #迭代次数 weights = ones((n,1)) #构造n,1矩阵 3*1 for k in range(maxCycles): h = sigmoid(dataMatrix * weights) error = (labelMat - h) weights = weights + alpha*dataMatrix.transpose()*error #批处理方式 return weights #画出决策边界 def plotBestFit(wei): # weight = wei.getA() #将numpy矩阵转换为数组 weight = wei dataMat , labelMat = loadDataSet() dataArr = array(dataMat) n = shape(dataArr)[0] xcord1 = [] ycord1 = [] xcord2 = [] ycord2 = [] for i in range(n): if labelMat[i] == 1: xcord1.append(dataArr[i,1]) ycord1.append(dataArr[i,2]) else: xcord2.append(dataArr[i,1]) ycord2.append(dataArr[i,2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord1, ycord1, s =30, c ='red', marker = 's') ax.scatter(xcord2, ycord2, s=30, c='green') x = arange(-3.0, 3.0, 0.1) #与range()类似,返回一个array对象 (起点,终点,步长) y = (-weight[0]-weight[1]*x)/weight[2] ax.plot(x,y) plt.xlabel('X1') plt.xlabel('X2') plt.show() #随机梯度上升法 在线算法 def stoGradAscent0(dataMatIn, classLabels): m, n = shape(dataMatIn) # m为行数100 n为列数3 alpha = 0.01 # 下降速率参数 weights = ones(n) # 构造n维向量 数组array for i in range(m): h = sigmoid(sum(dataMatIn[i] * weights)) error = classLabels[i] - h weights = weights + alpha* error * dataMatIn[i] #列表list 数组array 矩阵mat 三者不一样 数组可以a*data【i】 列表不可以 return weights #随机梯度上升法 改进 def stoGradAscent1(dataMatIn, classLabels, numIter = 150): m, n = shape(dataMatIn) # m为行数100 n为列数3 weights = ones(n) # 构造n维向量 数组array for j in range(numIter): #循环150 dataIndex = range(m) #范围0-99 for i in range(m): #循环100次 alpha = 4/(1.0+j+i) + 0.01 randIndex = int(random.uniform(0, len(dataIndex))) h = sigmoid(sum(dataMatIn[randIndex] * weights)) error = classLabels[randIndex] - h weights = weights + alpha* error * dataMatIn[randIndex] del(dataIndex[randIndex]) return weights #分类函数 def classisfyVector(inX, weights): prob = sigmoid(sum(inX*weights)) if prob > 0.5: return 1.0 else: return 0.0 path1 = r'E:\file\python\test\test\logisticRession_data\horseColicTest.txt' path2 = r'E:\file\python\test\test\logisticRession_data\horseColicTraining.txt' #使用logistic ression 算法 def colicTest(): frTrain = open(path2) frTest = open(path1) trainSet = [] labelSet = [] for lines in frTrain.readlines(): curline = lines.strip().split() lineArr = [] for i in range(21): lineArr.append(float(curline[i])) trainSet.append(lineArr) labelSet.append(float(curline[21])) wegiht = stoGradAscent1(array(trainSet), labelSet, 500) errcount = 0.0 numtest = 0.0 for lines in frTest.readlines(): numtest += 1.0 curline = lines.strip().split() Arr = [] for i in range(21): Arr.append(float(curline[i])) if int(classisfyVector(array(Arr), wegiht)) != int(curline[21]): errcount += 1.0 errorrate = errcount/numtest print('the error rate is :', errorrate) return errorrate def multitext(): errorsum = 0.0 for k in range(10): errorsum += colicTest() print(errorsum/float(10))
# -- coding: utf-8 -- from numpy import* from logRegres import* import matplotlib.pyplot as plt dataArr, labelMat = loadDataSet() W = gradAscent(dataArr, labelMat) print(W) we0 = stoGradAscent0(array(dataArr), labelMat) #将dataArr从列表变为数组再带入函数 we1 = stoGradAscent1(array(dataArr), labelMat) #plotBestFit(we1) multitext()