描述:我将用决策树分类和Smote+决策树组合对同一组数据进行实验,比较两次的分类结果精度,从而探究Smote算法对决策树分类结果有何影响?
数据:数据来源于UCI机器学习数据库中的气球数据,有5列:color、chicun、act、age、inflated。共有20*5组数据。
smote算法
import random
from sklearn.neighbors import NearestNeighbors
import numpy as np
import pandas as pd
from pandas import *
from numpy import *
import csv
from sklearn import tree
from sklearn.tree import export_graphviz
class Smote:
def __init__(self,samples,N,k=5):
self.n_samples,self.n_attrs = samples.shape
self.N = N
self.k = k
self.samples = samples
self.newindex = 0
@property
def over_sampling(self):
N = int(self.N/100)
self.synthetic = np.zeros((self.n_samples * N,self.n_attrs))
neighbors = NearestNeighbors(n_neighbors=self.k).fit(self.samples)
print 'neighbors',neighbors
for i