import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.datasets import make_blobs
##----创建需要的功能---
def nlist(a,n):
b=[]
for i in a:
b.append(i*n)
return b
#-----指标计算---
def index(a,b):#a实际,b预测,返回的是
TP=0
FN=0
TN=0
FP=0
for i in range(len(a)):
if a[i]==1 and b[i]==1:
TP += 1
if a[i]==1 and b[i]==0:
FN +=1
if a[i]==0 and b[i]==0:
TN +=1
if a[i]==0 and b[i]==1:
FP +=1
pre=(TP)/(TP+FP)
rec=(TP)/(TP+FN)
TNR=(TN)/(TN+FP)
Acc=(TP+TN)/(len(a))#除以总数
Gmean = (TNR*rec)**(0.5)
Fmeas=pre*rec
return np.around(Acc,4),np.around(Gmean,4),np.around(Fmeas,4)
def makenoise0(a,b):#给少数类制造噪声样本
for i in b:
a[i]=0
return a
def huatu(a,b,j):
pos_data=data[data['class']==1]
neg_data=data[data['class']!=1]
ax = fig.add_subplot(a+j)
ax.scatter(pos_data.iloc[:,0],pos_data.iloc[:,1],s=1,marker='+')
ax.scatter(neg_data.iloc[:,0],neg_data.iloc[:,1],s=1,marker='.')
ax.set_title('IR={}'.format(b))#保留两位数
j=1+j
def classifiers(X_train,X_test,y_train,y_test):
#------------DT
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
y_pred1 = dt.predict(X_test)
print(classification_report(y_test, y_pred1))
print(index(y_test,y_pred1))
a,b,c=index(y_test,y_pred1)
acc.append(a)
gme.append(b)
fme.append(c)
#-----RF-------
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
y_pred2 = rfc.predict(X_test)
print(classification_report(y_test, y_pred2))
print(index(y_test,y_pred2))
a,b,c=index(y_test,y_pred2)
acc.append(a)
gme.append(b)
fme.append(c)
#--------NB---------------
gnb = GaussianNB()
基于DT,RF,NB,SVM,AD的类不平衡数据集的比较
最新推荐文章于 2024-01-25 00:26:12 发布