import numpy as np
import math
classNode:def__init__(self,feature_index=None,value=None,label=None):
self.feature_index=feature_index
self.value=value
self.child=[]
self.label=label
classC4_5:def__init__(self,X,Y,c=0.1,way='ID3'):
self.c = c
self.root=Node()
self.X = X
self.Y = Y
self.feature_num =len(X[0])
self.label_num =len(Y)
self.feature_set =list(range(self.feature_num))
self.getac()
self.way = way
defgetac(self):
self.dict_x ={
}
self.dict_y =set(self.Y)for i inrange(self.feature_num):
self.dict_x[i]=set([X[i]for X in self.X])
@staticmethoddefget_label(list_):returnmax(list_, key=list_.count)
@staticmethoddefcount_Y(Y):
dict_y ={
}for i in Y:if i in dict_y.keys():
dict_y[i]+=1else:
dict_y[i]=1return dict_y
defexperience_entropy(self,Y):
dict_y = self.count_Y(Y)
D =len(Y)
set_y =set(Y)return-sum([dict_y[x]/D*math.log(dict_y[x]/D,2)for x in set_y])defget_feature(self,X,Y,rest_x):
HD = self.experience_entropy(Y)
Y = np.array(Y)
X = np.array(X)
entropy_ =[]if self.way =='ID3':for i in rest_x:
sum_ =0
list_x = np.array([x[i]for x in X])for j in self.dict_x[i]:
sum__ =0
Di =sum(list_x == j)if Di !=0:for m in self.dict_y:
Dik =sum(Y[list_x == j]==m)if Dik !=0:
sum__ += Dik/Di*math.log(Dik/Di,2)
sum_ -= Di/len(list_x)*sum__
add_entropy = HD - sum_
entropy_.append(add_entropy)if self.way =='C45':for i in rest_x:
sum_ =0
list_x = np.array([x[i]for x in X])for j in self.dict_x[i]:
sum__ =0
HAD =0
Di =sum(list_x == j)if Di !=0:for m in self.dict_y:
Dik =sum(Y[list_x == j]==m)if Dik !=0