程序针对特征值取值范围为[-1, 1]来实现,数据预处理后即可使用。
import math
import operator
import threading
import random
def loadDataSet(filename, boundry):
num = len(open(filename).readline().split(','))-1
dataMat = []; testMat = []
fr = open(filename)
ii = 0
for line in fr.readlines():
lineArr = []
curLine = line.strip().split(',')
for i in range(num):
lineArr.append(float(curLine[i+1]))
if ii >= boundry[0] and ii <= boundry[1]:
testMat.append(lineArr)
else:
dataMat.append(lineArr)
ii += 1
return dataMat, testMat
def calcShannonEnt(dataSet):
numEntries = len(dataSet)
labelCounts = {}
for featVec in dataSet:
currentLabel = featVec[-1]
if currentLabel not in labelCounts.keys(): labelCounts[currentLabel] = 0
lab