# coding=gbk
from numpy import *
def loadDataSet():
dataMat = []; labelMat = []
fr = open('testSet.txt')
for line in fr.readlines():
lineArr = line.strip().split()
#某个点所对应的x、y坐标
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
#某个点所属的类别class
labelMat.append(int(lineArr[2]))
return dataMat,labelMat
#sigmoid函数,近似符号函数
def sigmoid(inX):
return 1.0/(1+exp(-inX))
#梯度法
#为了以最快的速度到达最高点
#每次寻找梯度最大的方向,朝该方向移动固定长度alpha
#直到到达指定高度或走了多少次为止
def gradAscent(dataMatIn, classLabels):
#将python的list转成mat格式进行矩阵运算
#transpose()转置成列向量
dataMatrix = mat(dataMatIn) #convert to NumPy matrix
labelMat = mat(classLabels).transpose() #convert to NumPy matrix
m,n = shape(dataMatrix)
print(m,n)
alpha = 0.001 #步长
maxCycles = 500
weights = ones((n,1)) #初始值为1,1,1 代表三个参数权重相等
for k in range(maxCycles): #heavy on matrix operations
#dataMatrix
#error用于计算真实类别与预测类别的差值,按照该方向调整回归系数
h = sigmoid(dataMatrix*weights) #matrix mult
error = (labelMat - h) #vector subtraction
#其中,alpha * dataMatrix.transpose()表示的就是梯度
weights = weights + alpha * dataMatrix.transpose()* error #matrix mult
return weights
def plotBestFit(weights):
import matplotlib.pyplot as plt
dataMat,labelMat=loadDataSet()
dataArr = array(dataMat)
n = shape(dataArr)[0]
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(n):
if int(labelMat[i])== 1:
xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2])
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = arange(-3.0, 3.0, 0.1)
#y的值域
y = (-weights[0]-weights[1]*x)/weights[2]
ax.plot(x, y)
plt.xlabel('X1'); plt.ylabel('X2');
plt.show()
#随机梯度算法
def stocGradAscent0(dataMatrix, classLabels):
m,n = shape(dataMatrix)
alpha = 0.01
weights = ones(n) #initialize to all ones
for i in range(m):
h = sigmoid(sum(dataMatrix[i]*weights))
error = classLabels[i] - h
weights = weights + alpha * error * dataMatrix[i]
return weights
#改进的随机梯度算法
def stocGradAscent1(dataMatrix, classLabels, numIter=500):
m,n = shape(dataMatrix)
weights = ones(n) #initialize to all ones
for j in range(numIter):
dataIndex = list(range(m))
for i in range(m):
alpha = 4/(1.0+j+i)+0.0001 #apha decreases with iteration, does not
randIndex = int(random.uniform(0,len(dataIndex)))#go to 0 because of the constant
h = sigmoid(sum(dataMatrix[randIndex]*weights))
error = classLabels[randIndex] - h
weights = weights + alpha * 1 * dataMatrix[randIndex]
del(dataIndex[randIndex])
return weights
dataArr,labelMat=loadDataSet()
weights=stocGradAscent1(array(dataArr),labelMat,20)
plotBestFit(weights)
“相关推荐”对你有帮助么?
-
非常没帮助
-
没帮助
-
一般
-
有帮助
-
非常有帮助
提交