基于ID3的决策树算法,文中使用的sklearn的库,使用graphviz可以将决策树转换为pdf查看。
案例中用到的模拟数据如下:
############################################################################
RID age income student credit_rating class_buys_computer 1 youth high no fair no 2 youth high no excellent no 3 middle_aged high no fair yes 4 senior medium no fair yes 5 senior low yes fair yes 6 senior low yes excellent no 7 middle_aged low yes excellent yes 8 youth medium no fair no 9 youth low yes fair yes 10 senior medium yes fair yes 11 youth medium yes excellent yes 12 middle_aged medium no excellent yes 13 middle_aged high yes fair yes 14 senior medium no excellent no
############################################################################
from sklearn.feature_extraction import DictVectorizer import csv from sklearn import tree from sklearn import preprocessing from sklearn.externals.six import StringIO allelectionicsData = open(r'E:\myAI\AllElectronics.csv','rb') reader = csv.reader(allelectionicsData) headers = reader.next() featureList = [] lableList = [] print(headers) for row in reader: # print row lableList.append(row[len(row)-1]) rowDict = {} for i in range(1,len(row) - 1): rowDict[headers[i]] = row[i] featureList.append(rowDict) print featureList vec = DictVectorizer() dummyX = vec.fit_transform(featureList).toarray() print("dummyX: " + str(dummyX)) print(vec.get_feature_names()) print("Lablelist: "+str(lableList)) lb = preprocessing.LabelBinarizer() dummyY = lb.fit_transform(lableList) print("dummyY: "+ str(dummyY)) clf = tree.DecisionTreeClassifier(criterion='entropy') clf = clf.fit(dummyX,dummyY) print("clf: " + str(clf)) with open("allelectionicsData.dot",'w') as f: f = tree.export_graphviz(clf,feature_names = vec.get_feature_names(),out_file=f) oneRowX = dummyX[0,:] print("oneRowX: " + str(oneRowX)) newoneRow = oneRowX newoneRow[0] = 1 newoneRow[2] = 0 print("newoneRow : "+ str(newoneRow)) predictedY = clf.predict(newoneRow)print("predictedY: " + str(predictedY))