对于线性回归,由于都是求线性参数;逻辑回归,由于其呈S型,具有在坐标轴两个左右边进行急剧上升下降的趋近1或者0,因此具有分类特性。
决策树是天生的过拟合,而线性回归是天生的欠拟合;
L1范式可以完成
损失函数只针对于【有参数求解的模型】,损失函数越小则模型拟合的越好; Kmean对于total inertial(簇内平方和),虽然不是损失函数,并不是为了求解参数,但是它确实对模型拟合起到重要作用,可以不严谨的讲它是Keans的损失函数。
但决策树和KNN是绝对没有损失函数。
from sklearn.linear_model import LogisticRegression as LR
from sklearn.datasets import load_breast_cancer
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = load_breast_cancer()
X = data.data
y = data.target
X.shape
lr1 = LR(penalty="l1",solver="liblinear",C=0.5,max_iter=100)
lr1 = lr1.fit(X,y)
lr1.coef_
(lr1.coef_!=0).sum(axis=1)
lr2 = LR(penalty="l2",solver="liblinear",C=0.5,max_iter=100)
lr2 = lr2.fit(X,y)
# coef对应着逻辑回归中的W参数
lr2.coef_
(lr2.coef_ != 0).sum(axis=1)
l1 = []
l2 = []
l1test = []
l2test = []
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,y,test_size=0.3,random_state=420)
for i in np.linspace(0.05,1,19):
lrl1 = LR(penalty="l1",solver="liblinear",C=i,max_iter=100)
lrl2 = LR(penalty="l2",solver="liblinear",C=i,max_iter=100)
lrl1 = lrl1.fit(Xtrain,Ytrain)
l1.append(accuracy_score(lrl1.predict(Xtrain),Ytrain))
l1test.append(accuracy_score(lrl1.predict(Xtest),Ytest))
lrl2 = lrl2.fit(Xtrain,Ytrain)
l2.append(accuracy_score(lrl2.predict(Xtrain),Ytrain))
l2test.append(accuracy_score(lrl2.predict(Xtest),Ytest))
graph = [l1,l2,l1test,l2test]
color = ['green','black','lightgreen','gray']
label = ['L1','L2','L1test','L2test']
plt.figure(figsize=(6,6))
for i in range(len(graph)):
plt.plot(np.linspace(0.05,1,19),graph[i],color[i],label=label[i])
plt.legend(loc=4) #图例的位置在哪里?4表示,右下角
plt.show()