精确率和召回率是广泛用于信息检索和统计学分类领域的两个度量值,用来评价结果的质量。其中精度是检索出相关文档数与检索出的文档总数的比率,衡量的是检索系统的查准率;召回率是指检索出的相关文档数和文档库中所有的相关文档数的比率,衡量的是检索系统的查全率
#精确率 准不准 召回率 全不全 综合指标 参数思考
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
names=["Sample code number ","Clump Thickness","Uniformity of Cell Size","Uniformity of Cell Shape","Marginal Adhesion","Single Epithelial Cell Size","Bare Nuclei","Bland Chromatin","Mitoses","Class"]
breast=pd.read_csv("breast-cancer-wisconsin.data",names=names)
#ValueError: could not convert string to float: '?'
breast=breast.replace(to_replace="?",value=np.nan)
breast=breast.dropna()
#breast['Class'].unique()
#breast.info() 看空
features=breast.drop(columns=['Class'],axis=0)
#线性模型对特征值敏感,标准化
targets=breast['Class']
X_train,X_test,y_train,y_test=train_test_split(features,targets,test_size=0.25)
logist=LogisticRegression()
logist.fit(X_train,y_train)
y_predict=logist.predict(X_test)
print("准确率:",logist.score(X_test,y_test))
report=classification_report(y_test,y_predict,labels=[2,4],target_names=['良性','恶行'])
print(report)