数据:data.csv 密码:eorj
代码:
from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from sklearn import tree
import numpy
#读入数据
allElectronicsDate = open("D:\python project\decision Tree(决策树)\data.csv",'r',encoding='utf-8')
reader = csv.reader(allElectronicsDate)
headers = next(reader)
#print(headers)
featrueList = []
lableList = []
for row in reader:
lableList.append(row[len(row)-1])
rowDict = {}
for i in range(1,len(row)-1):
# print(row(i))
rowDict[headers[i]] = row[i]
# print(rowDict)
featrueList.append(rowDict)
print(featrueList)
vec = DictVectorizer()
dummyX = vec.fit_transform(featrueList).toarray()
print("dummyX:"+str(dummyX))
print(vec.get_feature_names())
print("labList:"+str(lableList))