# -*- coding:utf-8 -*-import colorsys
import random
import pandas as pd
import pylab as pl
defget_n_hls_colors(num):
hls_colors =[]
i =0
step =360.0/ num
while i <360:
h = i
s =90+ random.random()*10
l =50+ random.random()*10
_hlsc =[h /360.0, l /100.0, s /100.0]
hls_colors.append(_hlsc)
i += step
return hls_colors
defncolors(num):
rgb_colors =[]if num <1:return rgb_colors
hls_colors = get_n_hls_colors(num)for hlsc in hls_colors:
_r, _g, _b = colorsys.hls_to_rgb(hlsc[0], hlsc[1], hlsc[2])
r, g, b =[int(x *255.0)for x in(_r, _g, _b)]
rgb_colors.append([r, g, b])return rgb_colors
defcolor(value):
digit =list(map(str,range(10)))+list("ABCDEF")ifisinstance(value,tuple):
string ='#'for i in value:
a1 = i //16
a2 = i %16
string += digit[a1]+ digit[a2]return string
elifisinstance(value,str):
a1 = digit.index(value[1])*16+ digit.index(value[2])
a2 = digit.index(value[3])*16+ digit.index(value[4])
a3 = digit.index(value[5])*16+ digit.index(value[6])return(a1, a2, a3)# 计算欧几里得距离,a,b分别为两个元组defdist(a, b):return math.sqrt(math.pow(a[0]- b[0],2)+ math.pow(a[1]- b[1],2))# dist_mindefdist_min(Ci, Cj):returnmin(dist(i, j)for i in Ci for j in Cj)# dist_maxdefdist_max(Ci, Cj):returnmax(dist(i, j)for i in Ci for j in Cj)# dist_avgdefdist_avg(Ci, Cj):returnsum(dist(i, j)for i in Ci for j in Cj)/(len(Ci)*len(Cj))# 找到距离最小的下标deffind_Min(M):min=1000
x =0
y =0for i inrange(len(M)):for j inrange(len(M[i])):if i != j and M[i][j]<min:min= M[i][j]
x = i
y = j
return(x, y,min)# 算法模型:defAGNES(dataset, dist, k):# 初始化C和M
C =[]
M =[]for i in dataset:
Ci =[]
Ci.append(i)
C.append(Ci)for i in C:
Mi =[]for j in C:
Mi.append(dist(i, j))
M.append(Mi)
q =len(dataset)# 合并更新while q > k:
x, y,min= find_Min(M)
C[x].extend(C[y])
C.remove(C[y])
M =[]for i in C:
Mi =[]for j in C:
Mi.append(dist(i, j))
M.append(Mi)
q -=1return C
defc11():import random
L1 = random.sample(range(1,255),15)
L2 = random.sample(range(1,255),25)
L3 = random.sample(range(1,255),17)
d =[]for i in L1:for j in L3:for k in L2:
d.append((k, j, i))return d
defcolor1(value):
digit =list(map(str,range(10)))+list("ABCDEF")ifisinstance(value,tuple):
string ='#'for i in value:
a1 = i //16
a2 = i %16
string += digit[a1]+ digit[a2]return string
elifisinstance(value,str):
a1 = digit.index(value[1])*16+ digit.index(value[2])
a2 = digit.index(value[3])*16+ digit.index(value[4])
a3 = digit.index(value[5])*16+ digit.index(value[6])return(a1, a2, a3)# 画图defdraw(C, c2):
colValue =['r','y','g','b','c','k','m']for i inrange(len(C)):
coo_X =[]# x坐标列表
coo_Y =[]# y坐标列表for j inrange(len(C[i])):
coo_X.append(C[i][j][0])
coo_Y.append(C[i][j][1])
pl.rcParams['font.sans-serif']=['SimHei']
pl.rcParams['axes.unicode_minus']=False# print(color1(c[i]))
set_lst =set(c2)# set会生成一个元素无序且不重复的可迭代对象,也就是我们常说的去重iflen(set_lst)==len(c2):print('列表里的元素互不重复!')else:print('列表里有重复的元素!')# print(c2)# print(set_lst)
pl.scatter(coo_X, coo_Y, marker='x', color=c2[i], label=i)
pl.title("迭代次数:")
pl.show()# https://blog.csdn.net/qq_16564093/article/details/80698479 计算两个rgb颜色的相似度defColourDistance(rgb_1, rgb_2):
R_1, G_1, B_1 = rgb_1
R_2, G_2, B_2 = rgb_2
rmean =(R_1 + R_2)/2
R = R_1 - R_2
G = G_1 - G_2
B = B_1 - B_2
# print("R=",R,"G=",G,"B=",B)
distance = math.sqrt((2+ rmean /256)*(R **2)+4*(G **2)+(2+(255- rmean)/256)*(B **2))# print("ColourDistance=",distance)return distance
# https://blog.51cto.com/alun51cto/2424785import math
defcolorSimilarity(rgb1, rgb2):
r1, g1, b1 = rgb1
r2, g2, b2 = rgb2
r3 =(r1 - r2)/256
g3 =(g1 - g2)/256
b3 =(b1 - b2)/256
diff = math.sqrt(r3 * r3 + g3 * g3 + b3 * b3)# print("diff=",diff)return diff
import time
defcreateRGB():
colors =['1','2','3','4','5','6','7','8','9','0','A','B','C','D','E','F']
np.random.seed(int(time.time()))# https://www.jb51.net/article/255793.htm
cs1 = colors[np.random.randint(0,16)]
cs2 = colors[np.random.randint(0,16)]# https://m.php.cn/article/471335.html
cs1 =int(cs1,16)
cs2 =int(cs2,16)
r1 = cs1 *16+ cs2
cs3 = colors[np.random.randint(0,16)]
cs4 = colors[np.random.randint(0,16)]# color2 = int(cs2,16)
cs3 =int(cs3,16)
cs4 =int(cs4,16)
b1 = cs3 *16+ cs4
cs5 = colors[np.random.randint(0,16)]
cs6 = colors[np.random.randint(0,16)]# color3 = int(cs3,16)
cs5 =int(cs5,16)
cs6 =int(cs6,16)
g1 = cs5 *16+ cs6
rgb =[r1, b1, g1]# print("rgb=",rgb)return rgb
defrgbOctToHex(rgbOct):
rgbHex ="#"
colors =['0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F']for i in rgbOct:
cs =hex(i)[2:]iflen(cs)==1:
rgbHex +='0'
rgbHex += cs
return rgbHex
defcreateColors(num):
i =0
d =[]while i < num:
rgb_1 = createRGB()iflen(d)==0:
d.append(rgb_1)
i +=1else:
good =1for rgb in d:
diff = ColourDistance(rgb_1, rgb)if diff <80:
good =0breakif good ==1:print("diff=", diff)
i +=1
d.append(rgb_1)# https://blog.csdn.net/bolixi7800/article/details/100954774print("d=", d)
rd =[]for rgb in d:
rd.append(str(rgbOctToHex(rgb)))return rd
import numpy as np
if __name__ =='__main__':# 功能: 设置随机种子, 确保结果可复现# np.random.seed(5)
data = pd.read_csv('watermelon4.0.csv', header=None)
sample = data.iloc[:,1:3].values
# 数据处理 dataset是30个样本(密度,含糖量)的列表
dataset =[tuple(i)for i in sample]
C = AGNES(dataset, dist_min,30)
c2 = c11()
d = createColors(30)print(d)
draw(C, d)
AGNES.py
# -*- coding:utf-8 -*-import math
import numpy as np
import pandas as pd
import pylab as pl
# 计算欧几里得距离,a,b分别为两个元组defdist(a, b):return math.sqrt(math.pow(a[0]- b[0],2)+ math.pow(a[1]- b[1],2))# dist_mindefdist_min(Ci, Cj):returnmin(dist(i, j)for i in Ci for j in Cj)# dist_maxdefdist_max(Ci, Cj):returnmax(dist(i, j)for i in Ci for j in Cj)# dist_avgdefdist_avg(Ci, Cj):returnsum(dist(i, j)for i in Ci for j in Cj)/(len(Ci)*len(Cj))# 找到距离最小的下标deffind_Min(M):min=1000
x =0
y =0for i inrange(len(M)):for j inrange(len(M[i])):if i != j and M[i][j]<min:min= M[i][j]
x = i
y = j
return(x, y,min)# 算法模型:defAGNES(dataset, dist, k):# 初始化C和M
C =[]
M =[]for i in dataset:
Ci =[]
Ci.append(i)
C.append(Ci)for i in C:
Mi =[]for j in C:
Mi.append(dist(i, j))
M.append(Mi)
q =len(dataset)# 合并更新while q > k:
x, y,min= find_Min(M)
C[x].extend(C[y])
C.remove(C[y])
M =[]for i in C:
Mi =[]for j in C:
Mi.append(dist(i, j))
M.append(Mi)
q -=1return C
# 画图defdraw(C):
colValue =['r','y','g','b','c','k','m','peru']for i inrange(len(C)):
coo_X =[]# x坐标列表
coo_Y =[]# y坐标列表for j inrange(len(C[i])):
coo_X.append(C[i][j][0])
coo_Y.append(C[i][j][1])
pl.rcParams['font.sans-serif']=['SimHei']
pl.rcParams['axes.unicode_minus']=False
pl.scatter(coo_X, coo_Y, marker='x', color=colValue[i %len(colValue)], label=i)
pl.title("fig4:聚类簇数k=5")
pl.show()if __name__ =='__main__':# 功能: 设置随机种子, 确保结果可复现
np.random.seed(5)
data = pd.read_csv('watermelon4.0.csv', header=None)
sample = data.iloc[:,1:3].values
# 数据处理 dataset是30个样本(密度,含糖量)的列表
dataset =[tuple(i)for i in sample]
C = AGNES(dataset, dist_min,5)
draw(C)