#分类算法矩阵!!
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
filename = 'pima_data.csv'
names = ['preg','plas','pres','skin','test','mass','pedi','age','class']
data = read_csv(filename,names = names)
array = data.values
X = array[:,0:8]
Y = array[:,8]
num_folds = 10
seed = 7
model = LogisticRegression()
kfold = KFold(n_splits=num_folds,random_state=seed)
#分类准确度
# result = cross_val_score(model,X,Y,cv=kfold)
# print("算法评价结果准确度:%.3f (%.3f)" %(result.mean(),result.std()))
#对数损失函数
# kfold = KFold(n_splits=num_folds,random_state=seed)
# model = LogisticRegression()
# scoring = 'neg_log_loss'
# result = cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
# print("Logless:%.3f (%.3f)" %(result.mean(),result.std()))
#AUC
# scoring = 'roc_auc'
# result = cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
# print("AUC:%.3f (%.3f)" %(result.mean(),result.std()))
#混淆矩阵
'''用于比较分类结果和实际测得值,可以把分类结果的精度显示在一个混淆矩阵里'''
# test_size = 0.33
# seed = 4
# X_train,X_test,Y_traing,Y_test = train_test_split(X,Y,test_size=test_size,random_state=seed)
# model = LogisticRegression()
# model.fit(X_train,Y_traing)
# predicted = model.predict(X_test)
# matrix = confusion_matrix(Y_test,predicted) #混淆矩阵
# classes = ['0','1']
# dataframe = pd.DataFrame(data = matrix,index=classes,columns=classes)
# print(dataframe)
#分类报告
'''给出精确率、召回率、F1值和样本数目
精确率P = TP/(TP+FP),所有被检索到的项目中应该被检索到的项目占的比例
召回率R = TP/(TP+FN),所有检索到的项目占所有应该检索到的项目的比例
F1值就是精确率和召回率的调和均值'''
test_size = 0.33
seed = 4
X_train,X_test,Y_traing,Y_test = train_test_split(X,Y,test_size=test_size,random_state=seed)
model = LogisticRegression()
model.fit(X_train,Y_traing)
predicted = model.predict(X_test)
report = classification_report(Y_test,predicted)
print(report)
机器学习python算法评估矩阵
最新推荐文章于 2022-10-24 08:58:37 发布