逻辑回归

from sklearn.linear_model import LogisticRegression as LR
from sklearn.datasets import load_breast_cancer
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

data = load_breast_cancer()
X = data.data
y = data.target
#print('data.data:', X)
#print('data.target:', y)
#print('X.shape:', X.shape)
#print('y.shape:', y.shape)

##正则化在损失函数后加上参数向量θ的L1范式和L2范式的倍数来实现,正则项也被称为惩罚项,C正则化强度的倒数
#lrl1 = LR(penalty='l1', solver='liblinear', C=0.5, max_iter=1000) 
#lrl2 = LR(penalty='l2', solver='liblinear', C=0.5, max_iter=1000) 
#
##逻辑回归的重要属性cefo_,也就是θ,查看每一个属性对应的参数
#lrl1 = lrl1.fit(X, y)
#print('θ of lrl1:',lrl1.coef_) #输出结果发现有很多为0,说明相关性不大
#
#print((lrl1.coef_ !=0).sum(axis=1)) #以numpy.ndarray的类型的形式输出不为0的个数
#
#lrl2 = lrl2.fit(X, y)
#print('θ of lrl2:',lrl2.coef_)
##L1正则化和L2正则化的区别在于:L1正则化会将系数压缩为0,而L2对那些相关性很弱的特征系数 θ 即使压缩到很小也不会为0

l1 = []
l2 = []
l1test = []
l2test = []

Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, y, test_size=0.3, random_state=420)
for i in np.linspace(0.05, 1, 20): #从0.05开始到1(包括1在内)等分的20个数
    lrl1 = LR(penalty='l1', solver='liblinear', C=i, max_iter=1000)
    lrl2 = LR(penalty='l2', solver='liblinear', C=i, max_iter=1000)
    
    lrl1 = lrl1.fit(Xtrain, Ytrain)
    l1.append(accuracy_score(lrl1.predict(Xtrain), Ytrain)) #输出每一个不同的C的拟合效果
    l1test.append(accuracy_score(lrl1.predict(Xtest), Ytest)) #lrl1.score(Xtest, Ytest)
    
    lrl2 = lrl2.fit(Xtrain, Ytrain)
    l2.append(accuracy_score(lrl2.predict(Xtrain), Ytrain))
    l2test.append(accuracy_score(lrl2.predict(Xtest), Ytest))
    
graph = [l1, l2, l1test, l2test]
color = ['green', 'blue', 'lightgreen', 'red']
label = ['L1', 'L2', 'L1test', 'L2test']

plt.figure(figsize=(6,6)) #画布大小600*600
for i in range(len(graph)):
    plt.plot(np.linspace(0.05, 1, 20), graph[i], color[i], label=label[i])
    
plt.legend(loc=4) #在图中显示图例 loc=4表示在右下角显示
plt.show()

import numpy as np
from astropy.units import Ybarn
import math

def computeCorrelation(X, y):
    XBar = np.mean(X)
    yBar = np.mean(y)
    SSR = 0
    varX = 0
    varY = 0
    for i in range(0, len(X)):
        diffXXBar = X[i] - XBar
        diffYYBar = y[i] - yBar
        SSR += (diffXXBar * diffYYBar)
        varX += diffXXBar **2
        varY += diffYYBar **2
        
    SST = math.sqrt(varX * varY)
    return SSR / SST

#Polynomial Regression R平方值
def polyfit(X, y, degree):
    results = {}
    
    coeffs = np.polyfit(X, y, degree) #利用polyfit输出回归方程系数,其中degree指的是回归方程中xi的最高次幂
    
    #Polynomial Coefficients
    results['polynomail:'] = coeffs.tolist()
    
    p = np.poly1d(coeffs) #回归方程
    
    yhat = p(X) #y的预测值
    ybar = np.sum(y) / len(y)
    
    ssreg = np.sum((yhat - ybar)**2)
    sstot = np.sum((y - ybar)**2)
    
    results['determination:'] = ssreg / sstot
    
    return results

testX = [1, 3, 8, 7, 9]
testY = [10, 12, 24, 21, 34]

print(computeCorrelation(testX, testY))
print(polyfit(testX, testY, 1))
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值