1、创建模型
#读取文件
df = pd.read_csv('/2.csv')
#数据预处理
df_norm = (df - df.min()) / (df.max() - df.min())
#构建特征数据
X = df_norm.drop('target',axis=1)
#构建预测数据
y = df_norm['target']
#划分训练集和测试集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=1)
#构建模型并预测
from sklearn.linear_model import LogisticRegression
#建立模型
model = LogisticRegression()
#拟合数据
model.fit(X_train,y_train)
#预测数据
y_pred = model.predict(X_test)
2、模型评估
from sklearn.metrics import confusion_matrix
#计算混淆矩阵
cm = confusion_matrix(y_test,y_pred,labels=[0,1])
#混淆矩阵行和列求和
df_cm = pd.DataFrame(cm)
#计算混淆矩阵行的和
df_cm['Row_sum'] = df_cm.apply(lambda x: x.sum(), axis=1)
#计算混淆矩阵列的和
df_cm.loc['Col_sum'] = df_cm.apply(lambda x: x.sum())
#准确率计算
Acc = (df_cm.iloc[0,0]+df_cm.iloc[1,1])/df_cm.iloc[2,2]
#精确度计算
Precision = df_cm.iloc[0,0]/df_cm.iloc[2,0]
#召回率计算
Recall = df_cm.iloc[0,0]/df_cm.iloc[0,2]
#F1计算
F1 = 2*Precision*Recall/(Precision+Recall)
3、混淆矩阵可视化
import seaborn as sn
#混淆矩阵可视化
ax = sn.heatmap(df_cm,annot=True,fmt='.20g')
#添加标题
ax.set_title('confusion matrix')
#添加x轴标签
ax.set_xlabel('predict')
#添加y轴标签
ax.set_ylabel('true')
plt.show()