import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression # 获得数据 names=['Sample code number','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion','Single Hpithelial Cell Size','Bare Nucle','Bland Chromatin','Normal Nucleoli','Mitomeos','Class'] data=pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data",names=names) # 处理数据 处理掉数据里的缺失值 data=data.replace(to_replace="?",value=np.nan) # 使用dropna删除替代过的数据 data=data.dropna() # 分类数据 特征值 标准值 x=data.iloc[:,1:-1] y=data["Class"] # 分割数据 x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=20) # 标准化数据 transfer =StandardScaler() x_train=transfer.fit_transform(x_train) x_test=transfer.fit_transform(x_test) # 训练模型 estimator=LogisticRegression() ret=estimator.fit(x_train,y_train) print(ret) # 模型评估 print(estimator.score(x_test,y_test))
机器学习之逻辑回归
最新推荐文章于 2024-07-24 18:14:54 发布