logRegres.py
from numpy import * def loadDataSet(): dataMat = []; labelMat = [] fr = open('testSet.txt') for line in fr.readlines(): lineArr = line.strip().split() dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])]) labelMat.append(int(lineArr[2])) return dataMat, labelMat def sigmoid(inX): return 1.0/(1+ exp(-inX)) def gradAscent(dataMatIn, classLabels): dataMatrix = mat(dataMatIn) labelMat= mat(classLabels).transpose() m,n = shape(dataMatrix) alpha = 0.001 maxCycles = 500 weights = ones((n,1)) for k in range(maxCycles): h = sigmoid(dataMatrix * weights) error = (labelMat - h) weights = weights + alpha * dataMatrix.transpose() * error return weights def stocGradAscent0(dataMatrix, classLabels): m,n = shape(dataMatrix) alpha = 0.01 weights = ones(n) for i in range(m): # a = dataMatrix[i] * weights # b = sum(dataMatrix[i] * weights) h = sigmoid(sum(dataMatrix[i] * weights)) error = classLabels[i] - h weights = weights + alpha*error*dataMatrix[i] return weights def stocGradAscent1(dataMatrix, classLabels, numIter =150): m,n = shape(dataMatrix) weights = ones(n) for j in range(numIter): dataIndex = list(range(m)) for i in range(m): alpha = 4/(1.0+j+i) + 0.01 randIndex = int(random.uniform(0, len(dataIndex))) # a = dataMatrix[i] * weights # b = sum(dataMatrix[i] * weights) h = sigmoid(sum(dataMatrix[randIndex] * weights)) error = classLabels[randIndex] - h weights = weights + alpha*error*dataMatrix[randIndex] del (dataIndex[randIndex]) return weights def plotBestFit(weights): import matplotlib.pyplot as plt dataMat, labelMat = loadDataSet() dataArr = array(dataMat) n = shape(dataArr)[0] xcord1=[]; ycord1=[] xcord2=[]; ycord2=[] for i in range(n): if int(labelMat[i]) == 1 : xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2]) else: xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord1, ycord1, s=30, c='red', marker='s') ax.scatter(xcord2, ycord2, s=30, c='green') x =arange(-3.0,3.0,0.1) #weights[0]*1.0+weights[1]*x+weights[2]*y=0 y=(-weights[0] - weights[1]*x) / weights[2] ax.plot(x,y) plt.xlabel('X1'); plt.ylabel('X2') plt.show() dataArr, labelMat = loadDataSet() #weights = gradAscent(dataArr, labelMat) # print(weights) # plotBestFit(weights.getA()) weights = stocGradAscent1(array(dataArr), labelMat,500) print(weights) plotBestFit(weights) # print(dataArr) # a = array(dataArr) # print(array(dataArr)) # b = ones(3) # print(ones(3)) # c = ones((3,1)) # print(ones((3,1)))