算法学习记录
具体代码实现
import math
import numpy
class point_data_reader:
file_name = str()
def __init__(self,file_name):
point_data_reader.file_name = file_name
def get_data_list(self,num_lost):
file_ = open(self.file_name,'r+')
db = list()
for line in file_.readlines():
reader_list = list()
for reader_ in line.split():
try:
eval(reader_)
except:
reader_list.append(num_lost)
else:
reader_list.append(eval(reader_))
db.append(reader_list)
file_.close()
return db
class k_means:
def __init__(self,point_list):
k_means.point_list = point_list
k_means.dimension = len(point_list[0])
def add_random_point(self):
now = 0
point = list()
while now != self.dimension:
min = None
max = None
for line in self.point_list:
if max == None or line[now] > max :
max = line[now]
if min == None or line[now] < min :
min = line[now]
point.append(numpy.random.rand()*(max-min)+min)
now += 1
return point
def seprate_data(self,gather_point_list):
split_data = list()
for i in range(len(gather_point_list)):
split_data.append([])
for data_reader in self.point_list:
length = 0
now_max_index = 0
for point_reader in gather_point_list:
now_length = self.euclidean_distance(data_reader,point_reader)
if length < now_length:
length = now_length
now_max_index = gather_point_list.index(point_reader)
split_data[now_max_index].append(data_reader)
return split_data
def euclidean_distance(self,point,gather_point):
now = 0
sum = 0
while now != len(point):
sum += (point[now] - gather_point[now])**2
now += 1
return math.sqrt(sum)
def get_centeral_point(self,seprate_data):
gather_point = list()
for point_list_reader in seprate_data:
if len(point_list_reader) == 0:
continue
sum = numpy.zeros((1,self.dimension))
for point_reader in point_list_reader:
sum += numpy.mat(point_reader)
sum /= len(point_list_reader)
gather_point.append(sum.tolist()[0])
return gather_point
def get_seprate_point(self,k = 2,error = 0.000001):
gather_point = list()
for i in range(k):
gather_point.append(self.add_random_point())
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
while len(gather_point) != len(last_gather_point):
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
while abs((numpy.mat(gather_point)-numpy.mat(last_gather_point)).sum()) >= error:
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
while len(gather_point) != len(last_gather_point):
last_gather_point = list(gather_point)
seprate_data = self.seprate_data(gather_point)
gather_point = self.get_centeral_point(seprate_data)
return gather_point,seprate_data
if __name__ == '__main__':
db = point_data_reader('text.dat').get_data_list(num_lost = 1.0)
k = k_means(db)
p = k.add_random_point()
gather_point,seprate_data = k.get_seprate_point(k = numpy.random.random_integers(1,10),error = 0.001)
print(gather_point)