【机器学习】监督学习算法-二分类\多分类\回归\K邻近\威斯康辛乳腺癌\波士顿房价

线性回归、正则化、多分类线性模型

  • 全文代码如下:
import mglearn
import joblib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

#线性回归
x,y = mglearn.datasets.make_wave(n_samples=60)
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=42)
lr = LinearRegression().fit(x_train,y_train)

print("lr.coef_:{}".format(lr.coef_))
print("lr.intercept_:{}".format(lr.intercept_))
print("training set score:{:.5f}".format(lr.score(x_train,y_train)))
print("test set score:{:.2f}".format(lr.score(x_test,y_test)))
#波士顿房价线性回归
x,y = mglearn.datasets.load_extended_boston()
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=0)
lr = LinearRegression().fit(x_train,y_train)

print("training set score:{:.2f}".format(lr.score(x_train,y_train)))
print("test set score:{:.2f}".format(lr.score(x_test,y_test)))
#岭回归
from sklearn.linear_model import Ridge

ridge = Ridge().fit(x_train,y_train)
print("train set score:{:.2f}".format(ridge.score(x_train,y_train)))
print("test set score:{:.2f}".format(ridge.score(x_test,y_test)))

ridge10 = Ridge(alpha=10).fit(x_train,y_train)
print("train set score:{:.2f}".format(ridge10.score(x_train,y_train)))
print("test set score:{:.2f}".format(ridge10.score(x_test,y_test)))

ridge01 = Ridge(alpha=0.1).fit(x_train,y_train)
print("train set score:{:.2f}".format(ridge01.score(x_train,y_train)))
print("test set score:{:.2f}".format(ridge01.score(x_test,y_test)))

plt.plot(ridge.coef_,'s',label='Ridge alpha=1')
plt.plot(ridge10.coef_,'^',label='Ridge alpha=10')
plt.plot(ridge01.coef_,'v',label='Ridge alpha=0.1')
plt.plot(lr.coef_,'o',label='LinearRegression')

plt.xlabel('coefficient index')
plt.ylabel('coefficient magnitude')
plt.hlines(0,0,len(lr.coef_))
plt.ylim(-25,25)
plt.legend()
plt.show()

mglearn.plots.plot_ridge_n_samples()
plt.show()
#lasso
from sklearn.linear_model import Lasso

lasso = Lasso().fit(x_train,y_train)
print("train set score:{:.2f}".format(lasso.score(x_train,y_train)))
print("test set score:{:.2f}".format(lasso.score(x_test,y_test)))
print('number of feature used:{}'.format(np.sum(lasso.coef_ != 0)))

lasso001 = Lasso(alpha=0.01,max_iter=100000).fit(x_train,y_train)
print("train set score:{:.2f}".format(lasso001.score(x_train,y_train)))
print("test set score:{:.2f}".format(lasso001.score(x_test,y_test)))
print('number of feature used:{}'.format(np.sum(lasso001.coef_ != 0)))

lasso00001 = Lasso(alpha=0.0001,max_iter=100000).fit(x_train,y_train)
print("train set score:{:.2f}".format(lasso00001.score(x_train,y_train)))
print("test set score:{:.2f}".format(lasso00001.score(x_test,y_test)))
print('number of feature used:{}'.format(np.sum(lasso00001.coef_ != 0)))

plt.plot(lasso.coef_,'s',label='lasso alpha=1')
plt.plot(lasso001.coef_,'^',label='lasso alpha=0.01')
plt.plot(lasso00001.coef_,'v',label='lasso alpha=0.0001')
plt.plot(ridge01.coef_,'o',label='Ridge alpha=0.1')

plt.xlabel('coefficient index')
plt.ylabel('coefficient magnitude')
plt.ylim(-25,25)
plt.legend(ncol=2,loc=(0,1.05))
plt.show()
#分类的线性模型
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC

x,y = mglearn.datasets.make_forge()
fig,axes = plt.subplots(1,2,figsize=(10,3))

for model,ax in zip([LinearSVC(),LogisticRegression()],axes):
    clf = model.fit(x,y)
    mglearn.plots.plot_2d_separator(clf,x,fill=False,eps=0.5,ax=ax,alpha=.7)
    mglearn.discrete_scatter(x[:,0],x[:,1],y,ax=ax)
    ax.set_title('{}'.format(clf.__class__.__name__))
    ax.set_xlabel('feature 0')
    ax.set_ylabel('feature 1')
axes[0].legend()
plt.show()

mglearn.plots.plot_linear_svc_regularization()
plt.show()
#在乳腺癌数据集分析逻辑回归(默认L2正则化)
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
x_train,x_test,y_train,y_test = train_test_split(cancer.data,cancer.target,stratify=cancer.target,random_state=42)

logreg = LogisticRegression().fit(x_train,y_train)
print("training set score:{:.3f}".format(logreg.score(x_train,y_train)))
print("test set score:{:.3f}".format(logreg.score(x_test,y_test)))

logreg100 = LogisticRegression(C=100).fit(x_train,y_train)
print("training set score:{:.3f}".format(logreg100.score(x_train,y_train)))
print("test set score:{:.3f}".format(logreg100.score(x_test,y_test)))

logreg001 = LogisticRegression(C=0.01).fit(x_train,y_train)
print("training set score:{:.3f}".format(logreg001.score(x_train,y_train)))
print("test set score:{:.3f}".format(logreg001.score(x_test,y_test)))

plt.plot(logreg.coef_.T,'o',label='C=1')
plt.plot(logreg100.coef_.T,'^',label='C=100')
plt.plot(logreg001.coef_.T,'v',label='C=0.001')
plt.xticks(range(cancer.data.shape[1]),cancer.feature_names,rotation=90)
plt.hlines(0,0,cancer.data.shape[1])
plt.ylim(-5,5)
plt.xlabel("coefficient index")
plt.ylabel("coefficient magnitude")
plt.legend()
plt.show()

#L1正则化
for C,marker in zip([0.001,1,100],['o','^','v']):
    lr_l1 = LogisticRegression(C=C,penalty='l1').fit(x_train,y_train)
    print('train accuracy of l1 logreg with C={:.3f}:{:.2f}'.format(C,lr_l1.score(x_train,y_train)))
    print('test accuracy of l1 logreg with C={:.3f}:{:.2f}'.format(C,lr_l1.score(x_test,y_test)))

    plt.plot(lr_l1.coef_.T,marker,label='C={:.3f}'.format(C))
plt.xticks(range(cancer.data.shape[1]),cancer.feature_names,rotation=90)
plt.hlines(0,0,cancer.data.shape[1])
plt.ylim(-5,5)
plt.xlabel("coefficient index")
plt.ylabel("coefficient magnitude")
plt.legend(loc=3)
plt.show()

#多分类的线性模型
from sklearn.datasets import make_blobs
from sklearn.svm import LinearSVC

x,y = make_blobs(random_state=42)
mglearn.discrete_scatter(x[:,0],x[:,1],y)
plt.xlabel('feature 0')
plt.ylabel('feature 1')
plt.legend(['class 0','class 1','class 2'])
plt.show()

linear_svm = LinearSVC().fit(x,y)
print('coefficient shape:',linear_svm.coef_.shape)
print('intercept shape:',linear_svm.intercept_.shape)

mglearn.discrete_scatter(x[:,0],x[:,1],y)
line = np.linspace(-15,15)
for coef,intercept,color in zip(linear_svm.coef_,linear_svm.intercept_,['b','r','g']):
    plt.plot(line,-(line*coef[0]+intercept)/coef[1],c=color)
plt.ylim(-10,15)
plt.xlim(-10,8)
plt.xlabel('feature 0')
plt.ylabel('feature 1')
plt.legend(['class 0','class 1','class 2','line class 0','line class 1','class 2',],loc=(1.01,0.3))
plt.show()

mglearn.plots.plot_2d_classification(linear_svm,x,fill=True,alpha=.7)
mglearn.discrete_scatter(x[:,0],x[:,1],y)
line = np.linspace(-15,15)
for coef,intercept,color in zip(linear_svm.coef_,linear_svm.intercept_,['b','r','g']):
    plt.plot(line,-(line*coef[0]+intercept)/coef[1],c=color)
plt.legend(['class 0','class 1','class 2','line class 0','line class 1','class 2',],loc=(1.01,0.3))
plt.xlabel('feature 0')
plt.ylabel('feature 1')
plt.show()



在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

二分类、回归算法、威斯康辛乳腺癌、波士顿房价

import mglearn 
import matplotlib.pyplot as plt

x,y = mglearn.datasets.make_forge()

mglearn.discrete_scatter(x[:,0],x[:,1],y)
plt.legend(['Class 0','Class 1'],loc = 4)
plt.xlabel('first feature')
plt.ylabel('second feature')
print('x.shape:{}'.format(x.shape))
plt.show()
 
x.shape:(26, 2)

在这里插入图片描述

x,y = mglearn.datasets.make_wave(n_samples=40)
plt.plot(x,y,'o')
plt.ylim(-3,3)
plt.xlabel('feature')
plt.ylabel('target')
plt.show()

在这里插入图片描述

  • 总程序
import mglearn 
import matplotlib.pyplot as plt
import numpy as np

x,y = mglearn.datasets.make_forge()

mglearn.discrete_scatter(x[:,0],x[:,1],y)
plt.legend(['Class 0','Class 1'],loc = 4)
plt.xlabel('first feature')
plt.ylabel('second feature')
print('x.shape:{}'.format(x.shape))
plt.show()
 
x,y = mglearn.datasets.make_wave(n_samples=40)
plt.plot(x,y,'o')
plt.ylim(-3,3)
plt.xlabel('feature')
plt.ylabel('target')
plt.show()

from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
print("cancer.keys():\n{}".format(cancer.keys()))
print('shape of cancer data:\n{}'.format(cancer.data.shape))
print('sample counts per class:\n{}'.format({n : v for n,v in zip(cancer.target_names,np.bincount(cancer.target))}))
print('feature names:\n{}'.format(cancer.feature_names))

from sklearn.datasets import load_boston
boston = load_boston()
print('data shape:\n{}'.format(boston.data.shape))

x,y = mglearn.datasets.load_extended_boston()
print('x.shape:{}'.format(x.shape))

x.shape:(26, 2)
cancer.keys():
dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])
shape of cancer data:
(569, 30)
sample counts per class:
{'malignant': 212, 'benign': 357}
feature names:
['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
data shape:
(506, 13)
x.shape:(506, 104)

K邻近、二分类、回归

  • 全文代码如下:
import matplotlib.pyplot as plt
import mglearn
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

x,y = mglearn.datasets.make_forge()
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state = 0)
clf = KNeighborsClassifier(n_neighbors=3)
clf.fit(x_train,y_train)
print("test set predictions:{}".format(clf.predict(x_test)))
print('test set accuracy:{:.3f}'.format(clf.score(x_test,y_test)))

fig,axes = plt.subplots(1,3,figsize=(10,3))
for n_neighbors,ax in zip([1,3,9],axes):
	clf = KNeighborsClassifier(n_neighbors=n_neighbors).fit(x,y)
	mglearn.plots.plot_2d_separator(clf,x,fill=True,eps=0.5,ax=ax,alpha=.4)
	mglearn.discrete_scatter(x[:,0],x[:,1],ax=ax)
	ax.set_title('{} neighbor(s)'.format(n_neighbors))
	ax.set_xlabel('feature 0')
	ax.set_ylabel('feature 1')
axes[0].legend(loc=3)
plt.show()

from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
x_train,x_test,y_train,y_test = train_test_split(cancer.data,cancer.target,stratify=cancer.target,random_state=66)

training_accuracy = []
test_accuracy = []
neighbors_settings = range(1,11)

for n_neighbors in neighbors_settings:
	clf = KNeighborsClassifier(n_neighbors=n_neighbors)
	clf.fit(x_train,y_train)
	training_accuracy.append(clf.score(x_train,y_train))
	test_accuracy.append(clf.score(x_test,y_test))
plt.plot(neighbors_settings,training_accuracy,label='training accuracy')
plt.plot(neighbors_settings,test_accuracy,label='test accuracy')
plt.ylabel('accuracy')
plt.xlabel('n_neighbors')
plt.legend()
plt.show()

from sklearn.neighbors import KNeighborsRegressor
x,y = mglearn.datasets.make_wave(n_samples=40)
x_train,x_test,y_train,y_test = train_test_split(x,y,random_state=0)
reg = KNeighborsRegressor(n_neighbors=3)
reg.fit(x_train,y_train)

print('test set predictons:{}'.format(reg.predict(x_test)))
print('test set r^2 :{:.2f}'.format(reg.score(x_test,y_test)))

fig,axes = plt.subplots(1,3,figsize=(15,4))
line = np.linspace(-3,3,1000).reshape(-1,1)
for n_neighbors,ax in zip([1,3,9],axes):
	reg = KNeighborsRegressor(n_neighbors=n_neighbors)
	reg.fit(x_train,y_train)
	ax.plot(line,reg.predict(line))
	ax.plot(x_train,y_train,'^',c=mglearn.cm2(0),markersize=8)
	ax.plot(x_test,y_test,'^',c=mglearn.cm2(1),markersize=8)
	ax.set_title('{}neighbor(s) train score:{:.2f} test score:{:.2f}'.format(n_neighbors,reg.score(x_train,y_train),reg.score(x_test,y_test)))
	ax.set_xlabel('feature')
	ax.set_ylabel('target')
axes[0].legend(['model predictiond','training data/target','test data/target'],loc='best')
plt.show()

在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值