import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.preprocessing import StandardScaler
# 模型评估
from sklearn.metrics import classification_report
# auc指标计算
from sklearn.metrics import roc_auc_score
# 加载数据
data = pd.read_csv('breast-cancer-wisconsin.data', encoding='gbk')
# print(data)
# 列名称
columns = ['Sample code number', 'Clump Thickness', 'Uniformity of Cell Size', 'Uniformity of Cell Shape',
'Marginal Adhesion', 'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin', 'Normal Nucleoli',
'Mitoses', 'Class:']
# 替换列名称
data.columns = columns
# 先将数据转化为df
data = pd.DataFrame(data)
# print(data)
# 将空值?转化为np.nan类型
data.replace('?', np.nan, inplace=True)
data.dropna(how='any', axis
线性逻辑回归以及稳健性测试
最新推荐文章于 2024-06-27 14:27:10 发布