参考
python大战机器学习
01.线性模型
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,discriminant_analysis,model_selection
#加载数据集,返回元组(训练集,测试集,及其标签值)
def load_data():
diabetes = datasets.load_diabetes()
return model_selection.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0)
#线性回归模型
def test_LinearRegression(*data):
X_train,X_test,y_train,y_test=data
#加载模型
regr=linear_model.LinearRegression()
#用模型训练数据
regr.fit(X_train,y_train)
#输出权重和截距
print('权重: %s ,截距: %.2f '%(regr.coef_,regr.intercept_))
#残差平方和
print("残差平方和: %.2f "%np.mean((regr.predict(X_test) - y_test)**2))
#性能
print('score: %.2f'%regr.score(X_test,y_test))
#调用函数
X_train,X_test,y_train,y_test=load_data()
test_LinearRegression(X_train,X_test,y_train,y_test)
02.岭回归
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,discriminant_analysis,model_selection
#加载数据集,返回元组(训练集,测试集,及其标签值)
def load_data():
diabetes = datasets.load_diabetes()
return model_selection.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0)
#岭回归
def test_Ridge(*data):
X_train,X_test,y_train,y_test=data
#加载 岭回归
regr=linear_model.Ridge()
regr.fit(X_train,y_train)
# 输出权重和截距
print('权重: %s ,截距: %.2f ' % (regr.coef_, regr.intercept_))
# 残差平方和
print("残差平方和: %.2f " % np.mean((regr.predict(X_test) - y_test) ** 2))
# 性能
print('score: %.2f' % regr.score(X_test, y_test))
def test_Ridge_alpha(*data):
X_train,X_test,y_train,y_test=data
alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
scores=[]
for i,alpha in enumerate(alphas):
regr=linear_model.Ridge(alpha=alpha)
regr.fit(X_train,y_train)
scores.append(regr.score(X_test,y_test))
##绘图
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(alphas,scores)
ax.set_xlabel(r"$\alpha&")
ax.set_ylabel(r"score")
ax.set_xscale('log')
ax.set_title("Ridge")
plt.show()
X_train,X_test,y_train,y_test=load_data()
test_Ridge(X_train,X_test,y_train,y_test)
X_train,X_test,y_train,y_test=load_data()
test_Ridge_alpha(X_train,X_test,y_train,y_test)
03.Lasso回归
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,discriminant_analysis,model_selection
########加载数据集,返回元组(训练集,测试集,及其标签值)
def load_data():
diabetes = datasets.load_diabetes()
return model_selection.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0)
######## Lasso
def test_Lasso(*data):
X_train,X_test,y_train,y_test=data
regr=linear_model.Lasso()
regr.fit(X_train,y_train)
# 输出权重和截距
print('权重: %s ,截距: %.2f ' % (regr.coef_, regr.intercept_))
# 残差平方和
print("残差平方和: %.2f " % np.mean((regr.predict(X_test) - y_test) ** 2))
# 性能
print('score: %.2f' % regr.score(X_test, y_test))
def test_Lasso_alpha(*data):
X_train,X_test,y_train,y_test=data
alphas=[0.01,0.02,0.05,0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000]
scores=[]
for i,alpha in enumerate(alphas):
regr=linear_model.Lasso(alpha=alpha)
regr.fit(X_train,y_train)
scores.append(regr.score(X_test,y_test))
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(alphas,scores)
ax.set_xlabel(r'&\alpha$')
ax.set_ylabel(r"score")
#把x轴设置为对数坐标
ax.set_xscale('log')
ax.set_title("Lasso")
plt.show()
X_train,X_test,y_train,y_test=load_data()
test_Lasso(X_train,X_test,y_train,y_test)
X_train,X_test,y_train,y_test=load_data()
test_Lasso_alpha(X_train,X_test,y_train,y_test)
04.ElasitcNet回归
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,discriminant_analysis,model_selection
#加载数据集,返回元组(训练集,测试集,及其标签值)
def load_data():
diabetes = datasets.load_diabetes()
return model_selection.train_test_split(diabetes.data,diabetes.target,test_size=0.25,random_state=0)
#ElasticNet回归
def test_ElasticNet(*data):
X_train,X_test,y_train,y_test=data
regr=linear_model.ElasticNet()
regr.fit(X_train,y_train)
# 输出权重和截距
print('权重: %s ,截距: %.2f ' % (regr.coef_, regr.intercept_))
# 残差平方和
print("残差平方和: %.2f " % np.mean((regr.predict(X_test) - y_test) ** 2))
# 性能
print('score: %.2f' % regr.score(X_test, y_test))
def test_ElasticNet_alpha_rho(*data):
X_train,X_test,y_train,y_test=data
alphas=np.logspace(-2,2)
rhos=np.linspace(0.01,1)
scores=[]
for alpha in alphas:
for rho in rhos:
regr=linear_model.ElasticNet(alpha=alpha,l1_ratio=rho)
regr.fit(X_train,y_train)
scores.append(regr.score(X_test,y_test))
alphas,rhos=np.meshgrid(alphas,rhos)
scores=np.array(scores).reshape(alphas.shape)
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
fig=plt.figure()
ax=Axes3D(fig)
surf=ax.plot_surface(alphas,rhos,scores,rstride=1,cstride=1,cmap=cm.jet,linewidth=0,antialiased=False)
fig.colorbar(surf,shrink=0.5,aspect=5)
ax.set_xlabel(r"$\alpha$")
ax.set_ylabel(r"$\rho$")
ax.set_zlabel("score")
ax.set_title("ElasticNet")
plt.show()
X_train,X_test,y_train,y_test=load_data()
test_ElasticNet(X_train,X_test,y_train,y_test)
X_train,X_test,y_train,y_test=load_data()
test_ElasticNet_alpha_rho(X_train,X_test,y_train,y_test)
05.逻辑回归
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model,discriminant_analysis,model_selection
def load_data():
iris=datasets.load_iris()
X_train=iris.data
y_train=iris.target
return model_selection.train_test_split(X_train,y_train,test_size=0.25,random_state=0,stratify=y_train)
def test_LogisticRegression(*data):
X_train,X_test,y_train,y_test=data
regr=linear_model.LogisticRegression()
regr.fit(X_train,y_train)
# 输出权重和截距
print('权重: %s ,截距: %s ' % (regr.coef_, regr.intercept_))
# 性能
print('score: %.2f' % regr.score(X_test, y_test))
def test_LogisticRegression_multinomial(*data):
X_train, X_test, y_train, y_test = data
regr = linear_model.LogisticRegression(multi_class='multinomial',solver='lbfgs')
regr.fit(X_train,y_train)
# 输出权重和截距
print('权重: %s ,截距: %s ' % (regr.coef_, regr.intercept_))
# 性能
print('score: %.2f' % regr.score(X_test, y_test))
def test_LogisticRegression_C(*data):
X_train, X_test, y_train, y_test = data
Cs=np.logspace(-2,4,num=100)
scores=[]
for C in Cs:
regr=linear_model.LogisticRegression(C=C)
regr.fit(X_train,y_train)
scores.append(regr.score(X_test,y_test))
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.plot(Cs,scores)
ax.set_xlabel(r"C")
ax.set_ylabel(r"score")
ax.set_xscale('log')
ax.set_title("LogisticRegression")
plt.show()
X_train,X_test,y_train,y_test=load_data()
test_LogisticRegression(X_train,X_test,y_train,y_test)
X_train,X_test,y_train,y_test=load_data()
test_LogisticRegression_multinomial(X_train,X_test,y_train,y_test)
X_train,X_test,y_train,y_test=load_data()
test_LogisticRegression_C(X_train,X_test,y_train,y_test)
06.决策树回归
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn import model_selection
import matplotlib.pyplot as plt
#随机产生的数据集
def creat_data(n):
np.random.seed(0)
X=5*np.random.rand(n,1)
y=np.sin