from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from sklearn import tree
from sklearn.externals.six import StringIO
#read in the csv file and put features in the list of class label
allElectronicsData=open(r'C:\Users\Administrator\Desktop\buy_computer.csv','r')
reader=csv.reader(allElectronicsData)
headers=reader.next()
print(headers)
featureList=[]
labelList=[]
for row in reader:
labelList.append(row[len(row)-1])
rowDict={}
for i in range(1,len(row)-1):
#print(row[i])
rowDict[headers[i]]=row[i]
#print("rowDict: ",rowDict)
featureList.append(rowDict)
print(featureList)
#Vetorize features
vec=DictVectorizer()
dummyX=vec.fit_transform(featureList).toarray()
print("dummyX:"+str(dummyX))
print(vec.get_feature_names())
print("labellist: "+str(labelList))
#Vectorize class labels
lb=preprocessing.LabelBinarizer()
dummyY=lb.fit_transform(labelList)
print("labelList: "+str(labelList))
print("dummyY: "+str(dummyY))
#Using decision tree for classification
#clf=tree.DecisiontreeClassifier()
clf=tree.DecisionTreeClassifier(criterion='entropy')
clf=clf.fit(dummyX,dummyY)
print("clf: "+str(clf))
#Visulise model
#with open("allElectronicGiniOri.dot",'w') as f;
with open("allElectronicInformationGainOri.dot",'w') as f:
# f=tree.export_graphviz(clf,out_file=f)
f=tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
oneRowX=dummyX[0,:]
print("oneRowX: "+str(oneRowX))
newRowX=oneRowX
newRowX[0]=1
newRowX[2]=0
print("predictedY: "+str(newRowX))
predictedY=clf.predict(newRowX)
print("predictedY: "+str(predictedY))
import csv
from sklearn import preprocessing
from sklearn import tree
from sklearn.externals.six import StringIO
#read in the csv file and put features in the list of class label
allElectronicsData=open(r'C:\Users\Administrator\Desktop\buy_computer.csv','r')
reader=csv.reader(allElectronicsData)
headers=reader.next()
print(headers)
featureList=[]
labelList=[]
for row in reader:
labelList.append(row[len(row)-1])
rowDict={}
for i in range(1,len(row)-1):
#print(row[i])
rowDict[headers[i]]=row[i]
#print("rowDict: ",rowDict)
featureList.append(rowDict)
print(featureList)
#Vetorize features
vec=DictVectorizer()
dummyX=vec.fit_transform(featureList).toarray()
print("dummyX:"+str(dummyX))
print(vec.get_feature_names())
print("labellist: "+str(labelList))
#Vectorize class labels
lb=preprocessing.LabelBinarizer()
dummyY=lb.fit_transform(labelList)
print("labelList: "+str(labelList))
print("dummyY: "+str(dummyY))
#Using decision tree for classification
#clf=tree.DecisiontreeClassifier()
clf=tree.DecisionTreeClassifier(criterion='entropy')
clf=clf.fit(dummyX,dummyY)
print("clf: "+str(clf))
#Visulise model
#with open("allElectronicGiniOri.dot",'w') as f;
with open("allElectronicInformationGainOri.dot",'w') as f:
# f=tree.export_graphviz(clf,out_file=f)
f=tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
oneRowX=dummyX[0,:]
print("oneRowX: "+str(oneRowX))
newRowX=oneRowX
newRowX[0]=1
newRowX[2]=0
print("predictedY: "+str(newRowX))
predictedY=clf.predict(newRowX)
print("predictedY: "+str(predictedY))