线性回归梯度下降
import numpy as np
import matplotlib. pyplot as plt
import matplotlib as mpl
from sklearn. linear_model import LinearRegression
hanjiahao = LinearRegression( )
mpl. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
def model ( x, theta) :
return x. dot( theta)
def costfunc ( h, y) :
m= len ( h)
e= h- y
return 1.0 / ( 2 * m) * e. T. dot( e)
def gradDecs ( x, y, alpha= 0.01 , max_iter= 15000 ) :
m, n= x. shape
theta= np. zeros( ( n, 1 ) )
jarr= np. zeros( max_iter)
for i in range ( max_iter) :
h= model( x, theta)
e= h- y
jarr[ i] = costfunc( h, y)
theta -= alpha* ( 1.0 / m* x. T. dot( e) )
return jarr, theta
def preprocess ( x, y) :
n= len ( x)
x= np. c_[ np. ones( ( n, 1 ) ) , x]
return x, y
data_train= np. loadtxt( 'train_GDP.txt' , delimiter= ',' )
data_test= np. loadtxt( 'test_GDP.txt' , delimiter= ',' )
x_train= data_train[ : , : - 1 ]
y_train= data_train[ : , - 1 : ]
x_train, y_train= preprocess( x_train, y_train)
jarr, theta= gradDecs( x_train, y_train)
x_test= data_test[ : , : - 1 ]
y_test= data_test[ : , - 1 : ]
x_test, y_test= preprocess( x_test, y_test)
h= model( x_test, theta)
plt. scatter( y_test, y_test, c= 'r' , label= '真实值' )
plt. scatter( y_test, h, c= 'b' , label= '预测值' )
plt. legend( )
plt. show( )
线性逻辑回归
import numpy as np
import matplotlib as mpl
import matplotlib. pyplot as plt
mpl. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
mpl. rcParams[ 'axes.unicode_minus' ] = False
np. set_printoptions( threshold= np. nan)
def g ( z) :
return 1.0 / ( 1 + np. exp( - z) )
def model ( x, theta) :
z= x. dot( theta)
h= g( z)
return h
def costfunc ( h, y) :
m= len ( h)
J= - 1.0 / m* np. sum ( y* np. log( h) + ( 1 - y) * np. log( 1 - h) )
return J
def gradDesc ( x, y, alpha= 0.01 , max_iter= 15000 ) :
m, n= x. shape
theta= np. zeros( ( n, 1 ) )
jarr= np. zeros( max_iter)
for i in range ( max_iter) :
h= model( x, theta)
jarr[ i] = costfunc( h, y)
e= h- y
theta -= alpha* ( 1.0 / m* x. T. dot( e) )
return jarr, theta
def accuracy ( h, y) :
m= len ( h)
count= 0
for i in range ( m) :
h[ i] = np. where( h[ i] >= 0.5 , 1 , 0 )
if h[ i] == y[ i] :
count+= 1
return count/ m
def draw ( x, y, theta) :
zeor= y[ : , 0 ] == 0
one= y[ : , 0 ] == 1
plt. scatter( x[ zeor, 1 ] , x[ zeor, 2 ] , c= 'b' , label= '负向类' )
plt. scatter( x[ one, 1 ] , x[ one, 2 ] , c= 'r' , label= '正向类' )
minx1= x[ : , 1 ] . min ( )
maxx1= x[ : , 1 ] . max ( )
minx2= - ( theta[ 0 ] + theta[ 1 ] * minx1) / theta[ 2 ]
maxx2= - ( theta[ 0 ] + theta[ 2 ] * maxx1) / theta[ 2 ]
plt. plot( [ minx1, maxx1] , [ minx2, maxx2] )
plt. legend( )
plt. show( )
def preprocess ( x, y) :
mean = np. mean( x, 0 )
sigma = np. std( x, 0 , ddof= 1 )
x = ( x - mean) / sigma
m = len ( x)
x = np. c_[ np. ones( ( m, 1 ) ) , x]
y = np. c_[ y]
return x, y
data= np. loadtxt( 'ex2data1.txt' , delimiter= ',' )
x, y= np. split( data, [ - 1 ] , axis= 1 )
x, y= preprocess( x, y)
n= int ( len ( x) * 0.7 )
x_train, x_test= np. split( x, [ n] )
y_train, y_test= np. split( y, [ n] )
jarr, theta= gradDesc( x_train, y_train)
h= model( x_test, theta)
print ( '测试集预测精度' , accuracy( h, y_train) )
draw( x_test, y_test, theta)
a= np. arange( - 5 , 5 )
b= g( a)
plt. plot( a, b)
plt. show( )
正则逻辑回归
import numpy as np
import matplotlib. pyplot as plt
import matplotlib as mpl
mpl. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
mpl. rcParams[ 'axes.unicode_minus' ] = False
def g ( z) :
return 1.0 / ( 1 + np. exp( - z) )
def model ( x, theta) :
z= x. dot( theta)
h= g( z)
return h
def costfunc ( h, y, R) :
m= len ( h)
J= - 1.0 / ( 2 * m) * np. sum ( y* np. log( h) + ( 1 - y) * np. log( 1 - h) ) + R
return J
def gradDesc ( x, y, alpha= 0.01 , max_iter= 15000 , lamda= 1.2 ) :
m, n= x. shape
theta= np. zeros( ( n, 1 ) )
jarr= np. zeros( max_iter)
for i in range ( max_iter) :
h= model( x, theta)
theta_r= np. copy( theta)
theta_r[ 0 ] = 0
R= lamda/ 2 * m* theta_r. T. dot( theta_r)
jarr[ i] = costfunc( h, y, R)
e= h- y
theta -= alpha* ( 1.0 / m* x. T. dot( e) + lamda* theta_r)
return jarr, theta
def jingdu ( h, y) :
m= len ( h)
count= 0
for i in range ( m) :
h[ i] = np. where( h[ i] >= 0.5 , 1 , 0 )
if h[ i] == y[ i] :
count+= 1
return count/ m
def draw ( x, y, theta) :
zero= y[ : , 0 ] == 0
one= y[ : , 0 ] == 1
plt. scatter( x[ zero, 2 ] , x[ zero, 3 ] , c= 'r' , label= '负向类' )
plt. scatter( x[ one, 2 ] , x[ one, 3 ] , c= 'b' , label= '正向类' )
min_x= x[ : , 2 ] . min ( )
max_x= x[ : , 2 ] . max ( )
min_y= - ( theta[ 0 ] + min_x* theta[ 1 ] ) / theta[ 2 ]
max_y= - ( theta[ 0 ] + max_x* theta[ 1 ] ) / theta[ 2 ]
plt. plot( [ min_x, max_x] , [ min_y, max_y] )
plt. title( '正/负向图' )
plt. legend( )
plt. show( )
def preprocess ( x, y) :
m= len ( x)
mean= np. mean( x, 0 )
std= np. std( y, 0 , ddof= 1 )
x= ( x- mean) / std
x= np. c_[ np. ones( ( m, 1 ) ) , x]
y= np. c_[ y]
return x, y
data_train= np. loadtxt( 'train.txt' , delimiter= ',' )
data_test= np. loadtxt( 'test.txt' , delimiter= ',' )
x_train, y_train= np. split( data_train, [ - 1 ] , axis= 1 )
x_test, y_test= np. split( data_test, [ - 1 ] , axis= 1 )
x_train, y_train= preprocess( x_train, y_train)
x_test, y_test= preprocess( x_test, y_test)
jarr, theta= gradDesc( x_train, y_train)
h= model( x_test, theta)
print ( '精度为:' , jingdu( h, y_test) )
draw( x_test, y_test, theta)
draw( x_test, h, theta)
a= np. arange( - 10 , 10 )
b= g( a)
plt. plot( a, b)
plt. title( 'sigmoid函数图' )
plt. show( )
from sklearn. linear_model import LinearRegression as LR
from sklearn. preprocessing import OneHotEncoder
from sklearn. metrics import confusion_matrix, classification_report
from sklearn. neural_network import MLPClassifier
from sklearn. svm import SVC
from sklearn. cluster import KMeans
from sklearn. decomposition import PCA
from sklearn. tree import DecisionTreeRegressor as xian
from sklearn. tree import DecisionTreeClassifier as fen
import sklearn. datasets as dis
import matplotlib. pyplot as plt
import numpy as np
import numpy as np
import matplotlib as mpl
import matplotlib. pyplot as plt
from sklearn. cluster import KMeans
mpl. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
x= [ [ 2 , 5 ] , [ 4 , 6 ] , [ 3 , 1 ] , [ 6 , 4 ] , [ 7 , 2 ] , [ 8 , 4 ] , [ 2 , 3 ] , [ 3 , 1 ] , [ 5 , 7 ] , [ 6 , 9 ] , [ 12 , 16 ] , [ 10 , 11 ] , [ 15 , 19 ] , [ 16 , 12 ] , [ 11 , 15 ] , [ 10 , 14 ] , [ 19 , 11 ] , [ 17 , 14 ] , [ 16 , 11 ] , [ 13 , 19 ] ]
mean= np. mean( x, 0 )
std= np. std( x, 0 , ddof= 1 )
x= ( x- mean) / std
k= np. arange( 1 , 20 )
jarr= [ ]
for i in k:
model= KMeans( i)
model. fit( x)
jarr. append( model. inertia_)
plt. annotate( i, xy= ( i, model. inertia_) )
plt. plot( k, jarr)
plt. title( '图' )
plt. show( )
k= 2
model= KMeans( 2 )
carr= model. fit_predict( x)
model. fit( X)
model. predict( )
cen= model. cluster_centers_
plt. scatter( x[ : , 0 ] , x[ : , 1 ] , c= carr, cmap= plt. cm. Paired)
plt. scatter( cen[ : , 0 ] , cen[ : , 1 ] , s= 100 , c= [ 'r' , 'g' ] , marker= '^' )
for i in range ( k) :
plt. annotate( '中心' + str ( i+ 1 ) , xy= ( cen[ i, 0 ] , cen[ i, 1 ] ) , fontsize= 12 )
plt. title( '聚类归属散点图' )
plt. show( )
import numpy as np
import sklearn. datasets as dis
import matplotlib. pyplot as plt
import matplotlib as mpl
from sklearn. metrics import confusion_matrix, classification_report
mpl. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
mpl. rcParams[ 'axes.unicode_minus' ] = False
def g ( z) :
return 1.0 / ( 1 + np. exp( - z) )
def model ( x, theta) :
z= x. dot( theta)
h= g( z)
return h
def costfunc ( y, h, R) :
return - 1.0 / len ( y) * np. sum ( y* np. log( h) + ( 1 - y) * np. log( 1 - h) ) + R
def gradDesc ( x, y, alpha= 0.01 , max_iter= 1500 , lamda= 1.2 ) :
m, n= x. shape
theta= np. zeros( ( n, 1 ) )
jarr= np. zeros( max_iter)
for i in range ( max_iter) :
h= model( x, theta)
theta_r= np. copy( theta)
theta_r[ 0 ] = 0
R= lamda/ ( 2 * m) * theta_r. T. dot( theta_r)
jarr[ i] = costfunc( y, h, R)
theta -= alpha* ( 1.0 / len ( y) * x. T. dot( h- y) + lamda/ m* theta_r)
return jarr, theta
def score ( y, h) :
count= 0
m= len ( y)
for i in range ( m) :
h[ i] = np. where( h[ i] >= 0.5 , 1 , 0 )
if h[ i] == y[ i] :
count+= 1
return count/ m
def preprocess ( x, y) :
mean= np. mean( x, 0 )
std= np. std( x, 0 , ddof= 1 )
x= ( x- mean) / std
x= np. c_[ np. ones( ( len ( x) , 1 ) ) , x]
y= np. c_[ y]
return x, y
data= dis. load_iris( )
x= data. data
y= data. target
x= x[ y!= 2 , 1 : 3 ]
y= y[ y!= 2 ]
np. random. seed( 2 )
oder= np. random. permutation( len ( x) )
x= x[ oder]
y= y[ oder]
x, y= preprocess( x, y)
trainx, testx= np. split( x, [ int ( len ( x) * 0.65 ) ] )
trainy, testy= np. split( y, [ int ( len ( y) * 0.65 ) ] )
jarr, theta= gradDesc( trainx, trainy)
plt. plot( jarr)
plt. title( '代价函数' )
plt. show( )
h= model( testx, theta)
print ( 'test score:' , score( testy, h) )
print ( confusion_matrix( testy, h) )
print ( classification_report( testy, h) )
import numpy as np
import matplotlib. pyplot as plt
import matplotlib as mpl
from sklearn. preprocessing import OneHotEncoder
import sklearn. datasets as dis
from sklearn. metrics import confusion_matrix, classification_report
mpl. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
mpl. rcParams[ 'axes.unicode_minus' ] = False
def g ( z, driver= False ) :
if driver:
return z* ( 1 - z)
return 1.0 / ( 1 + np. exp( - z) )
def costfunc ( y, h) :
return - 1.0 / len ( y) * np. sum ( y* np. log( h) + ( 1 - y) * np. log( 1 - h) )
def model ( x, theta) :
a = [ ]
a. append( x)
for i in range ( len ( theta) ) :
z= a[ i] . dot( theta[ i] )
a. append( g( z) )
return a
def BP ( a, y, theta, alpha) :
len_a= len ( a)
len_theta= len ( theta)
delat= [ ]
delat. append( a[ len_a - 1 ] - y)
for i in range ( len_theta- 1 , - 1 , - 1 ) :
if i== 0 :
delat. append( 0 )
break
delat_up= delat[ len ( delat) - 1 ]
value= delat_up. dot( theta[ i] . T) * g( a[ i] , driver= True )
delat. append( value)
delat. reverse( )
for i in range ( len_theta) :
value= 1.0 / len ( y) * a[ i] . T. dot( delat[ i+ 1 ] )
theta[ i] -= alpha* value
return theta
def gradDesc ( x, y, hidden_layear_sizes= ( 400 , 100 , 50 , ) , max_iter= 1500 , alpha= 0.1 ) :
m, n= x. shape
hid= len ( hidden_layear_sizes)
col= y. shape[ 1 ]
theta= [ ]
for i in range ( hid+ 1 ) :
if i== 0 :
theta. insert( i, 2 * np. random. rand( n, hidden_layear_sizes[ i] ) - 1 )
elif i== hid:
theta. insert( i, 2 * np. random. rand( hidden_layear_sizes[ i- 1 ] , col) - 1 )
break
else :
theta. insert( i, 2 * np. random. rand( hidden_layear_sizes[ i- 1 ] , hidden_layear_sizes[ i] ) - 1 )
jarr= np. zeros( max_iter)
for i in range ( max_iter) :
a= model( x, theta)
jarr[ i] = costfunc( y, a[ len ( a) - 1 ] )
theta= BP( a, y, theta, alpha)
return jarr, theta
def score ( y, h) :
count= 0
m= len ( y)
for i in range ( m) :
if np. argmax( h[ i] ) == np. argmax( y[ i] ) :
count+= 1
return count/ m
def preprocess ( x, y) :
mean= np. mean( x, 0 )
std= np. std( x, 0 , ddof= 1 )
x= ( x- mean) / std
x= np. c_[ np. ones( ( len ( x) , 1 ) ) , x]
y= np. c_[ y]
return x, y
data= dis. load_iris( )
x= data. data
y= data. target
x= x[ y!= 2 , 1 : 4 ]
y= y[ y!= 2 ]
x, y= preprocess( x, y)
oder= OneHotEncoder( categories= 'auto' )
y= oder. fit_transform( y) . toarray( )
np. random. seed( 2 )
coder= np. random. permutation( len ( x) )
x= x[ coder]
y= y[ coder]
trainx, testx= np. split( x, [ int ( len ( x) * 0.65 ) ] )
trainy, testy= np. split( y, [ int ( len ( x) * 0.65 ) ] )
jarr, theta= gradDesc( trainx, trainy)
plt. plot( jarr)
plt. title( '代价曲线' )
plt. show( )
a= model( trainx, theta)
print ( 'train score:' , score( trainy, a[ len ( a) - 1 ] ) )
a= model( testx, theta)
print ( 'test score:' , score( testy, a[ len ( a) - 1 ] ) )
a_last= np. argmax( a[ len ( a) - 1 ] , axis= 1 )
testy= np. argmax( testy, axis= 1 )
print ( confusion_matrix( testy, a_last) )
print ( classification_report( testy, a_last) )
import numpy as np
import matplotlib. pyplot as plt
import matplotlib as mpl
from sklearn. preprocessing import OneHotEncoder
import sklearn. datasets as dis
from sklearn. metrics import confusion_matrix, classification_report
mpl. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
mpl. rcParams[ 'axes.unicode_minus' ] = False
def g ( z, driver= False ) :
if driver:
return z* ( 1 - z)
return 1.0 / ( 1 + np. exp( - z) )
def costfunc ( y, h) :
return - 1.0 / len ( y) * np. sum ( y* np. log( h) + ( 1 - y) * np. log( 1 - h) )
def model ( x, theta1, theta2) :
z2= x. dot( theta1)
a2= g( z2)
z3 = a2. dot( theta2)
a3 = g( z3)
return a2, a3
def BP ( x, a2, a3, y, theta1, theta2, alpha) :
delat3= a3- y
delat2= delat3. dot( theta2. T) * g( a2, driver= True )
thetadelat2= 1.0 / len ( x) * a2. T. dot( delat3)
thetadelat1= 1.0 / len ( x) * x. T. dot( delat2)
theta2 -= alpha* thetadelat2
theta1 -= alpha* thetadelat1
return theta1, theta2
def gradDesc ( x, y, hidden_layear_sizes= ( 100 , ) , max_iter= 1500 , alpha= 0.1 ) :
m, n= x. shape
hid= hidden_layear_sizes[ 0 ]
col= y. shape[ 1 ]
theta1= 2 * np. random. rand( n, hid) - 1
theta2= 2 * np. random. rand( hid, col) - 1
jarr= np. zeros( max_iter)
for i in range ( max_iter) :
a2, a3= model( x, theta1, theta2)
jarr[ i] = costfunc( y, a3)
theta1, theta2= BP( x, a2, a3, y, theta1, theta2, alpha)
return jarr, theta1, theta2
def score ( y, h) :
count= 0
m= len ( y)
for i in range ( m) :
if np. argmax( h[ i] ) == np. argmax( y[ i] ) :
count+= 1
return count/ m
def preprocess ( x, y) :
mean= np. mean( x, 0 )
std= np. std( x, 0 , ddof= 1 )
x= ( x- mean) / std
x= np. c_[ np. ones( ( len ( x) , 1 ) ) , x]
y= np. c_[ y]
return x, y
data= dis. load_iris( )
x= data. data
y= data. target
x= x[ y!= 2 , 1 : 3 ]
y= y[ y!= 2 ]
x, y= preprocess( x, y)
oder= OneHotEncoder( categories= 'auto' )
y= oder. fit_transform( y) . toarray( )
np. random. seed( 2 )
coder= np. random. permutation( len ( x) )
x= x[ coder]
y= y[ coder]
trainx, testx= np. split( x, [ int ( len ( x) * 0.65 ) ] )
trainy, testy= np. split( y, [ int ( len ( x) * 0.65 ) ] )
jarr, theta1, theta2= gradDesc( trainx, trainy)
plt. plot( jarr)
plt. title( '代价曲线' )
plt. show( )
a2, a3= model( trainx, theta1, theta2)
print ( 'train score:' , score( trainy, a3) )
a2, a3= model( testx, theta1, theta2)
print ( 'test score:' , score( testy, a3) )
a3= np. argmax( a3, axis= 1 )
testy= np. argmax( testy, axis= 1 )
print ( '混淆矩阵:' , confusion_matrix( testy, a3) )
print ( '分类报告:' , classification_report( testy, a3) )
import numpy as np
import sklearn. datasets as dis
from sklearn. neural_network import MLPClassifier
from sklearn. metrics import confusion_matrix, classification_report
from sklearn. preprocessing import OneHotEncoder
data= dis. load_iris( )
x= data. data
y= data. target
x= x[ y!= 2 , 1 : 4 ]
y= y[ y!= 2 ]
minx= x. min ( )
maxx= x. max ( )
x= ( x- minx) / ( maxx- minx)
x= np. c_[ np. ones( ( len ( x) , 1 ) ) , x]
y= np. c_[ y]
oder= OneHotEncoder( categories= 'auto' )
y= oder. fit_transform( y) . toarray( )
np. random. seed( 2 )
coser= np. random. permutation( len ( x) )
x= x[ coser]
y= y[ coser]
trainx, testx= np. split( x, [ int ( len ( x) * 0.65 ) ] )
trainy, testy= np. split( y, [ int ( len ( x) * 0.65 ) ] )
model= MLPClassifier( hidden_layer_sizes= ( 400 , 100 ) , max_iter= 15000 )
model. fit( trainx, trainy)
h= model. predict( testx)
print ( 'train score' , model. score( trainx, trainy) )
print ( 'test score' , model. score( testx, testy) )
h= np. argmax( h, axis= 1 )
testy= np. argmax( testy, axis= 1 )
print ( '混淆' , confusion_matrix( testy, h) )
print ( '报告' , classification_report( testy, h) )
import numpy as np
import matplotlib. pyplot as plt
import matplotlib as mpl
from sklearn. svm import SVC
import sklearn. datasets as dis
mpl. rcParams[ 'axes.unicode_minus' ] = False
data= dis. load_iris( )
x= data. data
y= data. target
x= x[ y!= 2 , 1 : 3 ]
y= y[ y!= 2 ]
mean= np. mean( x, 0 )
siga= np. std( x, 0 , ddof= 1 )
x= ( x- mean) / siga
num= int ( len ( x) * 0.7 )
trainx, testx= np. split( x, [ num] )
trainy, testy= np. split( y, [ num] )
model= SVC( C= 1.0 , gamma= 'auto' )
model. fit( trainx, trainy)
testh= model. predict( testx)
print ( '向量的个数:' , model. n_support_)
print ( '向量的索引:' , model. support_)
sup_zu= model. support_vectors_
print ( sup_zu)
xx, yy= np. mgrid[ x[ : , 0 ] . min ( ) : x[ : , 0 ] . max ( ) : 200j , x[ : , 1 ] . min ( ) : x[ : , 1 ] . max ( ) : 200j ]
z= model. decision_function( np. c_[ xx. ravel( ) , yy. ravel( ) ] )
z. shape= xx. shape
plt. contourf( xx, yy, z> 0 , cmap= plt. cm. Paired)
plt. contour( xx, yy, z, levels= [ - 1 , 0 , 1 ] , linestyles= [ '--' , '-' , '--' ] )
plt. scatter( x[ : , 0 ] , x[ : , 1 ] , c= y, cmap= plt. cm. Paired)
plt. scatter( sup_zu[ : , 0 ] , sup_zu[ : , 1 ] , s= 100 , cmap= plt. cm. Paired)
plt. show( )
from sklearn. linear_model import LinearRegression as LR
from sklearn. preprocessing import OneHotEncoder
from sklearn. metrics import confusion_matrix, classification_report
from sklearn. neural_network import MLPClassifier
from sklearn. svm import SVC
from sklearn. cluster import KMeans
from sklearn. decomposition import PCA
from sklearn. tree import DecisionTreeRegressor as xian
from sklearn. tree import DecisionTreeClassifier as fen
import sklearn. datasets as dis
import matplotlib. pyplot as plt
import numpy as np
data= dis. load_breast_cancer( )
x= data. data
y= data. target
model= LR( )
model= PCA( 3 )
z= model. fit_transform( x, y)
print ( '特征方差' , model. explained_variance_)
print ( '特征方差比率' , model. explained_variance_ratio_)
plt. scatter( z[ y== 0 , 0 ] , z[ y== 0 , 1 ] , label= '-1' )
plt. scatter( z[ y== 1 , 0 ] , z[ y== 1 , 1 ] , label= '1' )
plt. legend( )
plt. show( )
model= fen( max_depth= 5 )
model. fit( x, y)
print ( 'score:' , model. score( x, y) )
import numpy as np
import sklearn. datasets as dis
import matplotlib. pyplot as plt
import matplotlib as mpl
mpl. rcParams[ 'font.sans-serif' ] = [ 'SimHei' ]
mpl. rcParams[ 'axes.unicode_minus' ] = False
def model ( x, theta) :
return x. dot( theta)
def costfunc ( y, h) :
e= h- y
return 1.0 / ( 2 * len ( y) ) * e. T. dot( e)
def gradDesc ( x, y, alpha= 0.01 , max_iter= 15000 ) :
m, n= x. shape
theta= np. zeros( ( n, 1 ) )
jarr= np. zeros( max_iter)
for i in range ( max_iter) :
h= model( x, theta)
jarr[ i] = costfunc( y, h)
print ( jarr[ i] )
theta -= alpha* ( 1.0 / len ( x) * x. T. dot( h- y) )
return jarr, theta
def preprocess ( x, y) :
mean= np. mean( x, 0 )
std= np. std( x, 0 , ddof= 1 )
x= ( x- mean) / std
x= np. c_[ np. ones( ( len ( x) , 1 ) ) , x]
y= np. c_[ y]
return x, y
def score ( y, h) :
u = np. sum ( ( y - h) ** 2 )
v = np. sum ( ( y - y. mean( ) ) ** 2 )
return 1 - u / v
data= dis. load_boston( )
x= data. data
y= data. target
x, y= preprocess( x, y)
np. random. seed( 2 )
oser= np. random. permutation( len ( x) )
x= x[ oser]
y= y[ oser]
trainx, testx= np. split( x, [ int ( len ( x) * 0.7 ) ] )
trainy, testy= np. split( y, [ int ( len ( x) * 0.7 ) ] )
jarr, theta= gradDesc( trainx, trainy)
plt. plot( jarr)
plt. show( )
h= model( testx, theta)
print ( 'test score' , score( testy, h) )
plt. scatter( testy, testy, c= 'r' , label= '真实值' )
plt. scatter( testy, h, c= 'b' , label= '预测值' )
plt. legend( )
plt. show( )