- 感知器算法
import numpy as np
class Data():
POINT = [[0,2],[1,1],[2,0],[0,-2],[-1,-1],[-2,0]]
C = 1
W = []
times = 0
# Polength代表一共有几组数据
Polength = 2
Wlength = len(POINT[0])
def change_point():
for i in range(0,Data.Polength):
Data.POINT[i].append(Data.C)
def w_ready():
# 这里3后期可以进行调整
for i in range(0,Data.Polength):
Data.W.append([0,0,0])
def sub_list(a,b):
for i in range(0,len(a)):
a[i] = a[i] - b[i]
return a
def add_list(a,b):
for i in range(0,len(a)):
a[i] = a[i] + b[i]
return a
def iteration():
for i in range(0, Data.Polength):
d_result = []
w_ready_update = []
Data.times += 1
for w in Data.W:
d_result.append(np.dot(Data.POINT[i],w))
if np.argsort(d_result)[len(d_result) - 1] == i:
print(Data.W)
else:
for m in range(0,Data.Polength):
if i == m:
pass
else:
if d_result[i] <= d_result[m]:
w_ready_update.append(m)
if len(w_ready_update) != 0:
for n in w_ready_update:
Data.W[n] = sub_list(Data.W[n],Data.C * Data.POINT[i])
Data.W[i] = add_list(Data.W[i],Data.C * Data.POINT[i])
print('目前迭代' + str(Data.times) + '次')
print('此次有变化')
print(Data.W)
# print('目前迭代' + str(Data.times) + '次')
# print('此次没有变化')
# print(Data.W)
if len(w_ready_update) == 0:
print('一共迭代' + str(Data.times) + '次')
print(Data.W)
else:
print('目前迭代' + str(Data.times) + '次')
print(Data.W)
iteration()
if __name__=="__main__":
change_point()
w_ready()
iteration()
2.最近邻规则的简单实验法
import numpy as np
def distance(x,y):
sum = 0
for i in range(0,len(x)):
dtemp = x[i] - y[i]
sum += pow(dtemp,2)
return np.sqrt(sum)
'''
按最近邻规则的简单实验法
利用一个大列表list里面嵌套多个小列表llist
每个llist中的第一个数据是聚类中心
'''
# X代表数据集,T代表设定的阈值
def julei1(x,T,list):
for z in list:
if distance(x,z[0]) < T:
z.append(x)
else:
llist = []
llist.append(x)
list.append(llist)
break
number = [(0,0),(3,8),(2,2),(1,1),(5,3),(4,18),(6,3),(5,4),(6,4),(7,5)]
list = []
llist = [number[0]]
list.append(llist)
for x in number[1:]:
julei1(x,2,list)
print(list)
3.最大最小距离算法
import numpy as np
def distance(x,y):
sum = 0
for i in range(0,len(x)):
dtemp = x[i] - y[i]
sum += pow(dtemp,2)
return np.sqrt(sum)
'''
按最近邻规则的简单实验法
利用一个大列表list里面嵌套多个小列表llist
每个llist中的第一个数据是聚类中心
'''
# X代表数据集,T代表设定的阈值
def julei1(x,T,list):
for z in list:
if distance(x,z[0]) < T:
z.append(x)
else:
llist = []
llist.append(x)
list.append(llist)
break
number = [(0,0),(3,8),(2,2),(1,1),(5,3),(4,18),(6,3),(5,4),(6,4),(7,5)]
list = []
llist = [number[0]]
list.append(llist)
for x in number[1:]:
julei1(x,2,list)
print(list)
4.Kmean算法
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File : k-means clustering algorithm.py
@Contact : 17751019280@163.com
@Modify Time
------------
2020/11/7 2:22 下午
@Author @Version @Desciption
------- -------- -----------
SunBoyan 3.0 None
'''
import numpy as np
import random
# 把常用数据放到一起
class data():
# 基本数据集
number = [(0, 0), (3, 8), (2, 2), (1, 1), (5, 3), (4, 8), (6, 3), (5, 4), (6, 4), (7, 5)]
# clusteResult聚类合集
clusterResult = []
clusterPoint = []
# 随机选取聚类中心,i为个数
for i in range(0,3):
clusterPoint.append(number[i])
clusterResult.append([])
def distance(x,y):
sum = 0
for i in range(0,len(x)):
dtemp = x[i] - y[i]
sum += pow(dtemp,2)
return np.sqrt(sum)
def getAverage():
sign = 0
i = 0
for li in data.clusterResult:
sumx = 0
sumy = 0
for num in li:
sumx += num[0]
sumy += num[1]
if data.clusterPoint[i] == (sumx / len(li),sumy / len(li)):
sign += 1
else:
data.clusterPoint[i] = (sumx / len(li),sumy / len(li))
i += 1
# 3是手动确定要分的集合有几个
if sign == 3:
return 0
else:
return 1
# 每次迭代前对列表进行更新
def clearlist():
data.clusterResult.clear()
for i in range(0,len(data.clusterPoint)):
data.clusterResult.append([])
def clusterByDis():
clearlist()
for j in data.number:
tempdis = 99999999
for i in range(3):
if tempdis > distance(data.clusterPoint[i],j):
tempdis = distance(data.clusterPoint[i],j)
tempweb = i
data.clusterResult[tempweb].append(j)
if getAverage() == 0:
print(data.clusterResult)
else:
clusterByDis()
clusterByDis()
5.系统聚类法算法
import numpy as np
import numpy as np
def distance(x,y):
sum = 0
for i in range(0,len(x)):
dtemp = x[i] - y[i]
sum += pow(dtemp,2)
return np.sqrt(sum)
class data():
# number = [(0,3,1,2,0),(1,3,0,1,0),(3,3,0,0,1),(1,1,0,2,0),(3,2,1,2,1),(4,1,1,1,0)]
number = [(0, 0), (3, 8), (2, 2), (1, 1), (5, 3), (4, 8), (6, 3), (5, 4), (6, 4), (7, 5)]
# number = [(0, 0), (3, 8), (2, 2), (1, 1), (5, 3), (4, 8)]
length = len(number)
X = np.zeros((length,length))
# 计算合并次数
times = 0
#存储聚类的点
clusterResult = {}
#存储已经聚类的点
alredyCluster = []
# 获取最小值位置
def getmin():
min = 99999
minwebx = 0
minweby = 0
for i in range(0,len(data.X[0])):
for j in range(0,len(data.X[0])):
if min > data.X[i][j] and data.X[i][j] != 0:
min = data.X[i][j]
minwebx = i
minweby = j
# np.unravel_index获取索引值在多维数组的位置,第一个是目标值,第二个是目标矩阵
# print(np.unravel_index(min,data.X.shape))不能用浮点数做索引
# pos = np.unravel_index(min,data.X.shape)
print('输出最小值位置:'+str(minwebx) + ' ' + str(minweby))
pos = [minwebx,minweby]
return pos
def makeNumMatrix(temppos):
tempal1 = 0
tempal2 = 0
for i in data.alredyCluster:
if i <= temppos[0]:
tempal1 += 1
if i <= temppos[1]:
tempal2 += 1
temppos[0] -= tempal1
temppos[1] -= tempal2
return temppos
# 第一次判断两列最小并覆盖
def firJudgeMinForTwo(pos):
# 获取合并后的矩阵
li = []
data.times += 1
length = data.length - data.times
temp = np.array((length,2))
temp = data.X[: , [pos[0],pos[1]]]
li = temp.min(axis = 1)
li = np.delete(li,max(pos[0],pos[1]),0)
# 整合合并后的矩阵
data.X = np.delete(data.X,[pos[0],pos[1]],1)
data.X = np.delete(data.X, [pos[0], pos[1]], 0)
temp = np.zeros(length - 1)
data.X = np.insert(data.X,min(pos[0],pos[1]),values=temp,axis=0)
temp = np.zeros(length)
data.X = np.insert(data.X, min(pos[0], pos[1]), values=temp, axis=1)
data.X[: , min(pos[0],pos[1])] = li
data.X[min(pos[0],pos[1]) , :] = li
# 对number列表调整做准备
# 这里不定义一个新的会导致浅复制,即一个变化另一个随着变化
'''
本意是使A = B,B为一个列表,结果在后续对A的操作中,导致B中的值也改变了,才回忆起python中均为“引用”这话。
为避免这种情况,对这篇博文总结有两种情况可避免。
1. 利用列表切片 A = B[:]
2 . 深拷贝 A = copy.deepcopy(B)
'''
print('输出即将删除的数据:')
#将聚类的点放到一起
if data.times == 1:
print(data.number[pos[0]], data.number[pos[1]])
llist = [data.number[pos[0]],data.number[pos[1]]]
data.clusterResult[min(pos[0],pos[1])] = llist
# data.alredyCluster.append(max(pos[0],pos[1]))
little = max(pos[0],pos[1])
data.number.pop(little)
else:
# 遍历时不能修改字典元素
# list是函数名,慎用
sign = 0
for k1 in list(data.clusterResult.keys()):
if data.clusterResult[k1][0] == data.number[pos[0]]:
for k2 in list(data.clusterResult.keys()):
if data.clusterResult[k2][0] == data.number[pos[1]]:
for i in data.clusterResult[max(k1,k2)]:
data.clusterResult[min(k1,k2)].append(i)
data.number.remove(data.clusterResult[max(k1, k2)][0])
data.clusterResult.pop(max(k1,k2))
sign += 2
break
if sign == 0:
print(data.number[pos[1]])
data.clusterResult[k1].append(data.number[pos[1]])
sign += 1
data.number.pop(pos[1])
break
if sign == 0:
for k1 in list(data.clusterResult.keys()):
if data.clusterResult[k1][0] == data.number[pos[1]]:
print(data.number[pos[0]])
data.clusterResult[k1].append(data.number[pos[0]])
sign += 1
data.number.pop(pos[0])
break
if sign == 0:
print(data.number[pos[0]], data.number[pos[1]])
llist = [data.number[pos[0]], data.number[pos[1]]]
data.clusterResult[min(pos[0], pos[1])] = llist
data.number.pop(max(pos[0], pos[1]))
# 开始调整
print('输出已有的字典:')
print(data.clusterResult)
print('输出删除后的number数据')
print(data.number)
# 生成初始矩阵
def getMatrix():
list = []
# 求值
for x in data.number:
llist = []
for y in range(0, len(data.number)):
llist.append(distance(x, data.number[y]))
list.append(llist)
# 将数据存入矩阵中
for temp in range(0, len(data.number)):
data.X[:, temp] = list[temp]
def hierarchical_cluster():
# 理想值设定,递归跳出条件
if data.times== 999 or len(data.number) == 3:
return 0
firJudgeMinForTwo(getmin())
hierarchical_cluster()
# 主函数
def Main():
getMatrix()
hierarchical_cluster()
print(data.clusterResult)
Main()