社区发现是基于图结构的非常经典的聚类算法,与传统聚类算法:kmeans/dbscan等不同,前者能将离散数据进行团伙聚类,从而解决传统聚类方式基于连续值距离度量的缺点。
社区发现综述:
代码如下:
from IPython.display import SVG
import numpy as np
from scipy import sparse
import pandas as pd
from sknetwork.utils import edgelist2adjacency, edgelist2biadjacency
from sknetwork.data import convert_edge_list, load_edge_list, load_graphml
from sknetwork.visualization import svg_graph, svg_digraph, svg_bigraph
# 将df数据转换成sknetwork数据格式:
# edge_list = [("Alice", "Bob", 3), ("Bob", "Carey", 2), ("Alice", "David", 1), ("Carey", "David", 2), ("Bob", "David", 3)]
# 例如("Alice", "Bob", 3)代表"Alice" 访问"Bob" 访问了3次
edge_list = list(df.itertuples(index=False))
# 转换成图数据格式
graph = convert_edge_list(edge_list)
# 查看结果
adjacency = graph.adjacency
names = graph.names
#社区发现
from sknetwork.clustering import Louvain
algo = Louvain()
algo.fit(adjacency)
labels = algo.labels_
# 展示图
image = svg_graph(adjacency, names=names,width=2000,height=600,display_edge_weight=True)
SVG(image)