数据图片
from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import preprocessing
from numpy import *
import numpy as np
from sklearn import tree
from sklearn.externals.six import StringIO
DecistionTreeData = open(r'F:\机器学习\数据\01-ML-Decision Tree.csv', 'rt')
reader = csv.reader(DecistionTreeData)
headers = next(reader)
featureList = []
labelList = []
# 把数据转换成一个包含特征的list
for row in reader:
labelList.append(row[len(row) - 1])
rowDict = {}
for i in range(1, len(row) - 1):
# 相当于行成key:value这样类型的数据,由于第一个列是id,所以只提取出第二个开始到最后一个
rowDict[headers[i]] = row[i]
featureList.append(rowDict)
print(featureList)
# 我们要把每个实例变成矩阵的形式进行存储
# 也就是
# youth middle_age senor high medium low yes no fa