1. K-means
PS: 分模块编写!
import numpy as np
def kmeans(data, centers):
# n个数据
while True:
clusters = cluster_dataset(data, centers) # 根据聚类中心分类
cur_centers = centers
centers = evaluate_centers(clusters) # 更新聚类中心
if converge(cur_centers, centers): # 检查是否达到收敛
return centers, clusters
def evaluate_centers(clusters):
centers = []
for key, value in clusters.items():
avg = sum(value) / len(value)
centers.append(avg)
return centers
def converge(cur_centers, centers):
diverse = np.linalg.norm(np.mat(cur_centers)- np.mat(cur_centers))
if diverse <= 1:
return True
return False
def cluster_dataset(data, centers) -> dict:
clusters = {j: [] for j in range(len(centers))}
for element in data:
minIndex = min([(i[0], np.linalg.norm(element - i[1])) for i in enumerate(centers)], key=lambda t: t[1])[0]
clusters[minIndex].append(element)
return clusters
# 测试
data = np.array([[0,0],[1,0], [1.5,0], [2, 0],[3, 0]])
centers = np.mat([[1, 0], [2, 0]])
centers, clusters = kmeans(data, centers)
print(centers)
print(clusters)
2. 二叉树的序列化和反序列化
(1)基于前序遍历
前序遍历的话需要不断修改data里面的数据,因此需要引用传递参数,用到list进行传递,而且可以避免遇到负数时需要判断的问题。
# Definition for a binary tree node.
class TreeNode(object):
def __init__(self, x):
self.val = x
self.left = None
self.right = None
class Codec:
def serialize(self, root):
"""Encodes a tree to a single string.
:type root: TreeNode
:rtype: list
"""
if not root:
return ['#']
return [root.val] + self.serialize(root.left) + self.serialize(root.right)
def deserialize(self, data):
"""Decodes your encoded data to tree.
:type data: list
:rtype: TreeNode
"""
if not data:
return None
x = data.pop(0) # 这一步很重要,要不断修改data的值,才能保证不影响后面递归建树
if x == '#':
return None
root = TreeNode(x)
root.left = self.deserialize(data)
root.right = self.deserialize(data)
return root
如果要写成str数据类型的话,也是需要将字符串列表化,最好序列化的时候加上分隔符" , ",这样方便引用传参,而且负数问题也能解决。
class Codec:
def serialize(self, root):
"""Encodes a tree to a single string.
:type root: TreeNode
:rtype: str
"""
if not root:
return '#'
return str(root.val) +','+ self.serialize(root.left) + ',' + self.serialize(root.right)
def deserialize(self, data):
"""Decodes your encoded data to tree.
:type data: str
:rtype: TreeNode
"""
def dfs(data:list): # data需要使用引用传递,因此必须转换成列表,字符串是值传递,因此需要多写一个函数dfs
if not data:
return None
x = data.pop(0)
if x == '#':
return None
root = TreeNode(int(x))
root.left = dfs(data)
root.right = dfs(data)
return root
data = data.split(',')
root = dfs(data)
return root
(2)基于层序遍历
class TreeNode:
def __init__(self, x):
self.val = x
self.left = None
self.right = None
class Codec_bfs:
def serialize(self, root):
"""Encodes a tree to a single string.
:type root: TreeNode
:rtype: list
"""
if not root:
return []
q = [root]
s = []
while q:
node = q.pop(0)
if node:
s.append(node.val)
q.append(node.left)
q.append(node.right)
else:
s.append('#')
return s
def deserialize(self, data):
"""Decodes your encoded data to tree.
:type data: list
:rtype: TreeNode
"""
if not data:
return None
if data[0] == '#':
return None
x = data.pop(0)
root = TreeNode(x)
q = [root]
while q:
node = q.pop(0)
if data:
left = data.pop(0)
if not left == '#':
node.left = TreeNode(left)
q.append(node.left)
if data:
right = data.pop(0)
if not right == '#':
node.right = TreeNode(right)
q.append(node.right)
return root
s = Codec_bfs()
string = [0, 1, 3, '#', 2, 4, '#', '#', '#', '#', '#']
root = s.deserialize(string)
print(s.serialize(root))
注意:通过下标反序列化是不可以的!!!
比如,[1, 2, 3, #, #, 4, 5, 6, 7],
# 根据结点下标计算的代码,但是错误的!!!
def deserialize(self, data):
"""Decodes your encoded data to tree.
:type data: list
:rtype: TreeNode
"""
if not data:
return None
if data[0] == '#':
return None
root = TreeNode(data[0])
nodelist = [root]
i = 1
while i < len(data):
fnode, pos = divmod(i-1, 2)
# fnode表示双亲节点的下标,pos == 0表示data[i]为左子树的值,pos == 1 表示右子树的值
if nodelist[fnode]:
x = TreeNode(data[i]) if not data[i] == '#' else None
if pos == 0: # 左节点
nodelist[fnode].left = x
else: # 右节点
nodelist[fnode].right = x
nodelist.append(x)
i += 1