逻辑回归 breast-cancer 数据集处理

最新推荐文章于 2024-03-16 12:00:43 发布

黑色低级高中生

最新推荐文章于 2024-03-16 12:00:43 发布

阅读量1.8k

点赞数

分类专栏： ML 文章标签：机器学习逻辑回归 python

本文链接：https://blog.csdn.net/qq_35587463/article/details/109000814

版权

ML 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

def init():
    df = pd.read_csv("./breast-cancer.csv")
    
    # 处理无用列
    df = df.drop("id",1)
    df = df.drop("Unnamed: 32",1)
    
    # 处理标签列
    df['diagnosis'] = df['diagnosis'].map({
    	'M': 1,
    	'B': 0
    })
    
    # 划分训练集 (70%) 和测试集 (30%)
    train, test = train_test_split(df, test_size = 0.3, random_state=1)
    
    # 处理训练数据
    train_x = train.loc[:, 'radius_mean': 'fractal_dimension_worst']
    train_y = train.loc[:, ['diagnosis']]
    
    # 处理测试数据
    test_x = test.loc[:, 'radius_mean': 'fractal_dimension_worst']
    test_y = test.loc[:, ['diagnosis']]
    
    # 转换数据为np数组
    train_x = np.asarray(train_x)
    train_y = np.asarray(train_y)
    test_x = np.asarray(test_x)
    test_y = np.asarray(test_y)
    
    # 使用训练数据创建逻辑回归模型
    d = model(train_x.T, train_y.T, num_of_iterations=10000, alpha=0.000001)
    
    costs = d ["costs"]
    w = d["w"]
    b = d["b"]
    
    # 绘图
    plt.plot(costs)
    plt.title("损失-迭代次数")
    plt.xlabel("迭代次数(x100)")
    plt.ylabel("损失")
    
    # 计算精确度
    Y_prediction_train = predict(train_x.T, w, b)
    Y_prediction_test  = predict(test_x.T, w, b)
    
    print("\n训练数据测试精确度: {}%".format(100 - np.mean(np.abs(Y_prediction_train - train_y.T)) * 100))
    print("\n测试数据测试精确度: {}%".format(100 - np.mean(np.abs(Y_prediction_test - test_y.T)) * 100))
    
    plt.show()
    
    

# 初始化权值
def initialize(m):
    w = np.zeros((m,1))
    b = 0

    return w , b
    
# sigmoid函数    
def sigmoid(X):
    return 1/(1 + np.exp(- X))    

# 正反向传播
def propogate(X, Y, w, b):
    # 样本数量
    m = X.shape[1]

    # 正向传播 计算损失
    Z = np.dot(w.T, X) + b;    
    A = sigmoid(Z)
    cost= -(1/m) * np.sum(Y * np.log(A) + (1-Y) * np.log(1-A))
    
    # 反向传播 计算梯度
    dw = (1/m)* np.dot(X, (A-Y).T)
    db = (1/m)* np.sum(A-Y)
    
    grads= {"dw": dw, "db": db}
    return grads, cost


# 执行梯度下降
def optimize(X, Y, w, b, num_of_iterations, alpha):
    costs=[] 
    
    for i in range(num_of_iterations):
        grads, cost = propogate(X, Y, w, b)
        
        dw = grads["dw"]
        db = grads["db"]
        
        w = w - alpha * dw
        b = b - alpha * db
        
        # 每十次迭代存储一个损耗
        if i % 100 == 0:
            costs.append(cost)
            print("<%i>次迭代后的损失度: %f" % (i, cost))
            
    parameters = {
        "w": w, 
        "b": b
    }
    grads = {
        "dw": dw,
        "db": db
    }
    
    return parameters, grads, costs


# 对数据集进行预测
def predict(X, w, b):
    # 训练集数量
    m = X.shape[1] 
    
    y_prediction =  np.zeros((1,m))
    
    w = w.reshape(X.shape[0], 1)
    
    A=sigmoid(np.dot(w.T, X)+b)
    
    for i in range(A.shape[1]):
        if(A[0,i] < 0.5):
            y_prediction[0,i] = 0
        else:
            y_prediction[0,i] = 1
    
    return y_prediction


# 计算逻辑回归模型
def model(Xtrain, Ytrain, num_of_iterations, alpha):
    # 获取特征数量
    dim = Xtrain.shape[0] 
    
    w,b = initialize(dim)
    
    parameters, grads, costs = optimize(Xtrain, Ytrain, w, b, num_of_iterations, alpha) 
    
    w = parameters["w"]
    b = parameters["b"]
    
    d = {
        "w": w,
        "b": b, 
        "costs": costs
    }
    
    return d
    
if __name__ == "__main__":
    init()

在这里插入图片描述

黑色低级高中生

关注

0
点赞
踩
9

收藏

觉得还不错? 一键收藏
1
评论
逻辑回归 breast-cancer 数据集处理

# -*- coding: utf-8 -*-import pandas as pdimport numpy as npfrom matplotlib import pyplot as pltfrom sklearn.model_selection import train_test_splitdef init(): df = pd.read_csv("../Data/data.csv") # 处理无用列 df = df.drop("id",1) df
复制链接

扫一扫