数据的可视化和梯度上升法
import matplotlib. pyplot as plt
定义数据集下载函数
def loadDataSet ( ) :
dataMat = [ ]
labelMat = [ ]
f = open ( 'testSet.txt' )
for line in f. readlines( ) :
line_list = line. strip( ) . split( )
dataMat. append( [ 1 , float ( line_list[ 0 ] ) , float ( line_list[ 1 ] ) ] )
labelMat. append( int ( line_list[ 2 ] ) )
return dataMat, labelMat
定义激活函数
def sigmoid ( inX) :
return 1.0 / ( 1 + np. exp( - inX) )
import numpy as np
dataMat, labelMat = loadDataSet( )
dataMatArr = np. array( dataMat)
n = np. shape( dataArr) [ 0 ]
x_cord_1 = [ ] ; y_cord_1 = [ ]
x_cord_2 = [ ] ; y_cord_2 = [ ]
for i in range ( n) :
if int ( labelMat[ i] ) == 1 :
x_cord_1. append( dataMatArr[ i, 1 ] )
y_cord_1. append( dataMatArr[ i, 2 ] )
else :
x_cord_2. append( dataMatArr[ i, 1 ] )
y_cord_2. append( dataMatArr[ i, 2 ] )
绘制图像
fig = plt. figure( )
ax = fig. add_subplot( 111 )
ax. scatter( x_cord_1, y_cord_1, s= 30 , c= 'red' )
ax. scatter( x_cord_2, y_cord_2, s= 30 , c= 'green' )
<matplotlib.collections.PathCollection at 0x8db64a8>
实现梯度上升算法函数
def gradAscent ( dataMatIn, classLabels) :
dataMatrix = np. mat( dataMatIn)
labelMatrix = np. mat( classLabels) . transpose( )
m, n = np. shape( dataMatrix)
alpha = 0.001
maxCycles = 500
weights = np. ones( ( n, 1 ) )
for k in range ( maxCycles) :
h = sigmoid( dataMatrix * weights)
error = ( labelMatrix - h)
weights = weights + alpha * dataMatrix. transpose( ) * error
return weights
weights = gradAscent( dataArr, labelMat)
weights
matrix([[ 4.12414349],
[ 0.48007329],
[-0.6168482 ]])
分析数据,画出决策边界
def plotBestFit ( weights) :
dataMat, labelMat = loadDataSet( )
dataArr = np. array( dataMat)
n = np. shape( dataArr) [ 0 ]
x_cord_1 = [ ] ; y_cord_1 = [ ]
x_cord_2 = [ ] ; y_cord_2 = [ ]
for i in range ( n) :
if int ( labelMat[ i] ) == 1 :
x_cord_1. append( dataArr[ i, 1 ] )
y_cord_1. append( dataArr[ i, 2 ] )
else :
x_cord_2. append( dataArr[ i, 1 ] )
y_cord_2. append( dataArr[ i, 2 ] )
fig = plt. figure( )
ax = fig. add_subplot( 111 )
ax. scatter( x_cord_1, y_cord_1, s= 30 , c= 'red' )
ax. scatter( x_cord_2, y_cord_2, s= 30 , c= 'green' )
x = np. arange( - 3 , 3 , 0.1 )
y = ( - weights[ 0 ] - weights[ 1 ] * x) / weights[ 2 ]
ax. plot( x, y)
plt. xlabel( 'X1' )
plt. ylabel( 'X2' )
plt. show( )
plotBestFit( weights. getA( ) )