基于DT,RF,NB,SVM,AD的类不平衡数据集的比较

import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.datasets import make_blobs

##----创建需要的功能---
def nlist(a,n):
    b=[]
    for i in a:
        b.append(i*n)
    return b


#-----指标计算---
def index(a,b):#a实际,b预测,返回的是
    TP=0
    FN=0
    TN=0
    FP=0
    
    for i in range(len(a)):        
        if a[i]==1 and b[i]==1:
            TP += 1
        if a[i]==1 and b[i]==0:
            FN +=1
        if a[i]==0 and b[i]==0:
            TN +=1
        if a[i]==0 and b[i]==1:
            FP +=1
        
    pre=(TP)/(TP+FP)
    rec=(TP)/(TP+FN)
    TNR=(TN)/(TN+FP)
    Acc=(TP+TN)/(len(a))#除以总数
    Gmean = (TNR*rec)**(0.5)
    Fmeas=pre*rec 
    
    return np.around(Acc,4),np.around(Gmean,4),np.around(Fmeas,4)

def makenoise0(a,b):#给少数类制造噪声样本
    for i in b:
        a[i]=0
    return a 

def huatu(a,b,j):    
    pos_data=data[data['class']==1]
    neg_data=data[data['class']!=1] 
    ax = fig.add_subplot(a+j)
    ax.scatter(pos_data.iloc[:,0],pos_data.iloc[:,1],s=1,marker='+')
    ax.scatter(neg_data.iloc[:,0],neg_data.iloc[:,1],s=1,marker='.')
    ax.set_title('IR={}'.format(b))#保留两位数
    j=1+j

def classifiers(X_train,X_test,y_train,y_test):
    #------------DT
    dt = DecisionTreeClassifier()
    dt.fit(X_train, y_train)
    y_pred1 = dt.predict(X_test)
    print(classification_report(y_test, y_pred1))
    print(index(y_test,y_pred1))
    a,b,c=index(y_test,y_pred1)
    acc.append(a)
    gme.append(b)
    fme.append(c)
    #-----RF-------
    rfc = RandomForestClassifier()
    rfc.fit(X_train, y_train)
    y_pred2 = rfc.predict(X_test)
    print(classification_report(y_test, y_pred2))
    print(index(y_test,y_pred2))
    a,b,c=index(y_test,y_pred2)
    acc.append(a)
    gme.append(b)
    fme.append(c)

    #--------NB---------------
    gnb = GaussianNB()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值