不知道是我能力有限,还是其他原因,第10章一开始都编译不成功,bug了好久。
修改地方后如下:
from numpy import *
def loadDataSet(fileName): #general function to parse tab -delimited floats
dataMat = [] #assume last column is target value
fr = open(fileName)
for line in fr.readlines():
curLine = line.strip().split('\t') #此处curLine生成的数据为字符数组
fltLine = map(float,curLine) #map all elements to float() #变为fltLine = list(map(float,curLine)) ,暂时有点懵
dataMat.append(fltLine)
return dataMat
def distEclud(vecA, vecB):
return sqrt(sum(power(vecA - vecB, 2))) #la.norm(vecA-vecB)
def randCent(dataSet, k):
n = shape(dataSet)[1]
centroids = mat(zeros((k,n))) #create centroid mat
for j in range(n): #create random cluster centers, within bounds of each dimension
minJ = min(dataSet[:,j])
rangeJ = float(max(dataSet[:,j]) - minJ) #去掉float
centroids[:,j] = mat(minJ + rangeJ * random.rand(k,1)) #每次循环下来,minJ和rangeJ都为一行一列,所以k变为1,
#才能乘和加
return centroids