C#实现基于图论的聚类算法
ASCDT算法既能处理复杂数据,还能处理大量噪声,识别各种点簇形状,并且对脖颈问题有着较好的处理效果,实现ASCDT算法的基本思想如下:
ASCDT算法是基于Delaunay三角网的聚类算法,利用Delaunay三角网对于点群之间的空间邻近性的表达,识别出全局长边、局部长边、贴合边以及脖颈边,然后逐个删除,并不断寻找最小子图,从而达到点群聚类的效果,最后选取簇中心作为点群代表,实现综合。
基于图论的聚类算法流程
实现ASCDT算法的基本流程如下:
(1)构建elaunay三角网:交互绘制点,并构建相应的Delaunay三角网,加载图层并显示;
(2)删除全局长边:根据Delaunay三角网,利用整体和局部的关系,对所有边进行遍历,构建相应的全局目标函数:
G
l
o
b
a
l
_
L
o
n
g
_
E
d
g
e
s
(
P
i
)
=
{
∣
e
j
∣
,
∣
e
j
∣
≥
G
l
o
b
a
l
_
C
u
t
_
V
a
l
u
e
(
P
i
)
}
G
l
o
b
a
l
_
C
u
t
_
V
a
l
u
e
(
P
i
)
=
G
l
o
b
a
l
M
e
a
n
(
D
T
)
+
G
l
o
b
a
l
_
M
e
a
n
(
D
T
)
M
e
a
n
D
T
1
(
P
i
)
.
G
l
o
b
a
l
_
V
a
r
i
a
t
i
o
n
(
D
T
)
Global\_Long\_Edges(P_i)=\{|e_j|, |e_j|\geq Global\_Cut\_Value(P_i) \}\\ Global\_Cut\_Value(Pi)=Global_Mean(DT)+\frac{Global\_Mean(DT)}{Mean_{DT}^{1}(P_i)}.Global\_Variation(DT)
Global_Long_Edges(Pi)={∣ej∣,∣ej∣≥Global_Cut_Value(Pi)}Global_Cut_Value(Pi)=GlobalMean(DT)+MeanDT1(Pi)Global_Mean(DT).Global_Variation(DT)
边长高于全局目标函数者为全局长边,删除全局长边。
(3)删除局部长边:对于每一个簇依然存在整体和局部的关系,对簇类所有边进行遍历,构建相应的局部目标函数:
L
o
c
a
l
_
L
o
n
g
_
E
d
g
e
s
(
P
j
)
=
{
∣
e
j
∣
,
∣
e
j
∣
≥
L
o
c
a
l
_
C
u
t
_
V
a
l
u
e
(
P
j
)
}
L
o
c
a
l
_
C
u
t
_
E
d
g
e
s
(
P
j
)
=
M
e
a
n
G
i
2
+
β
.
M
e
a
n
_
V
a
r
i
a
t
i
o
n
(
G
i
)
Local\_Long\_Edges(P_j)=\{|e_j|, |e_j|\geq Local\_Cut\_Value(P_j) \}\\ Local\_Cut\_Edges(P_j)=Mean_{G_i}{2}+\beta . Mean\_Variation(G_i)
Local_Long_Edges(Pj)={∣ej∣,∣ej∣≥Local_Cut_Value(Pj)}Local_Cut_Edges(Pj)=MeanGi2+β.Mean_Variation(Gi)
边长高于局部目标函数者为局部长边,删除局部长边。
(4)删除脖颈和链:进一步考虑连接到集群之间链上各点的边。给定一个子图Gi,链上的两个点Pj和Pk(Pj∈Gi,Pk∈Gi)直接连接。 如果直接链接到Pj和Pk的点数都是两个,并且没有其他链接边Pj与Pk,那么Pj与Pk之间的边应该被删除。
(5)基于深度遍历的思想,不断寻找最小子图,相连接的为一个簇内的点,然后基于K-MEANS找代表的思想,进行聚类,获取中心点,即可实现点群的综合。
(6)适合多尺度空间:为了让算法能够适应多尺度的表达,我们设置了β因子,用于控制局部长边的删除阈值,提高了多尺度空间的适应性。
核心代码
引用核心库
添加引用:
using System.Windows.Forms;
using ESRI.ArcGIS.Carto;
using ESRI.ArcGIS.Controls;
using ESRI.ArcGIS.esriSystem;
using ESRI.ArcGIS.Geodatabase;
using ESRI.ArcGIS.Display;
using ESRI.ArcGIS.Geometry;
主函数调用
实现ASCDT算法代码如下:
public static List<TriCluster> executeASCDT(List<TriPoint> triPoint, List<TriCluster> clusters, double beta)
{
// 删除全局长边
List<TriPoint> stage1_point = CutGlobalLongEdge(triPoint);
// 对点集进行全局分类
List<TriCluster> Global_clusters = new List<TriCluster>();
Global_clusters = getTheSubGraph(stage1_point);
List<TriCluster> local_clusters = new List<TriCluster>();
for (int i = 0; i < Global_clusters.Count; i++)
{
TriCluster temp_cluster = Global_clusters[i];
// 删除局部长边
List<TriPoint> stage2_point = CutLocalLongEdge(temp_cluster.clusterPoints, beta);
// 新建簇类,保存分裂簇
List<TriCluster> temp_clusters = new List<TriCluster>();
temp_clusters = getTheSubGraph(stage2_point);
foreach (TriCluster local_cluster in temp_clusters){
local_clusters.Add(local_cluster);
}
}
for (int i = 0; i < local_clusters.Count; i++){
TriCluster local_cluster = local_clusters[i];
// 去除链
List<TriPoint> stage3_point = CutLocalLinkEdge(local_cluster.clusterPoints);
List<TriCluster> temp_clusters = new List<TriCluster>();
temp_clusters = getTheSubGraph(stage3_point);
foreach (TriCluster temp_cluster in temp_clusters) {
clusters.Add(temp_cluster);
}
}
return clusters;
}
删除全局长边
实现全局长边的删除代码:
#region CutGlobalLongEdge
/*说明:该函数传入的是切断全局长边后的聚类点集,每个点中的全局长边都删掉了
* 传出的东西是切掉的List
* */
private static List<TriPoint> CutGlobalLongEdge(List<TriPoint> tPoint)
{
TriPoint mpoint = new TriPoint();
// 获取全局方差
double Global_Variation = getGlobalVariataion(tPoint);
// 获取全局平均边长
double Global_Mean;
int count = 0;
double sum_edge = 0.0;
foreach (TriPoint point in tPoint){
List<TriEdge> temp_Edges = point.adjcentEdges;
foreach (TriEdge one_edge in temp_Edges){
sum_edge += one_edge.Length;
count++;
}
}
Global_Mean = sum_edge / count;
for (int index = 0; index < tPoint.Count; index++) {
// 遍历所有点,对每个点进行局部删边,并覆盖原有点
mpoint = CutGlobalPoint(tPoint[index], Global_Variation, Global_Mean);
tPoint[index] = mpoint;
}
return tPoint;
}
private static double getGlobalVariataion(List<TriPoint> tPoint)
{
// 用于存储全局平均标准差
double Global_mean_var = 0.0;
// 获取全局平均
double Global_Mean = 0.0;
int count = 0;
double sum_edge = 0.0;
foreach (TriPoint point in tPoint)
{
List<TriEdge> temp_Edges = point.adjcentEdges;
foreach (TriEdge one_edge in temp_Edges)
{
sum_edge += one_edge.Length;
}
count++;
}
Global_Mean = sum_edge / count;
foreach (TriPoint point in tPoint)
{
List<TriEdge> temp_Edges = point.adjcentEdges;
foreach (TriEdge one_edge in temp_Edges)
{
Global_mean_var += (one_edge.Length - Global_Mean) * (one_edge.Length - Global_Mean);
}
count++;
}
Global_mean_var = Global_mean_var / (count - 1);
Global_mean_var = Math.Sqrt(Global_mean_var);
return Global_mean_var;
}
/*说明:计算单个点的LocalCutValue,Mean(P)^2+β*Mean_Variation(G)
* 其中,β是弹性因子,应该设置在1-1.5中间,(最后放在FrmCluster的bar上,拖动改变β,β越小,聚类越多)
* MeanVariation是之前计算的在全局的平局局部标准差*/
private static TriPoint CutGlobalPoint(TriPoint point, double Global_Variation, double Global_Mean)
{
//计算一阶平均
double sum = 0, mean;
foreach (TriEdge edge in point.adjcentEdges)
{
sum += edge.Length;
}
mean = sum / point.adjcentEdges.Count;
List<TriEdge> good_edges = new List<TriEdge>();
for (int i = 0; i < point.adjcentEdges.Count; i++)
{
TriEdge edge = point.adjcentEdges[i];
if (edge.Length < (Global_Mean + Global_Mean / mean * Global_Variation) * 0.3)
{
good_edges.Add(edge);
}
}
point.adjcentEdges = good_edges;
return point;
}
#endregion
删除局部长边
实现局部长边的删除:
#region CutLocalLongEdge
/*说明:该函数传入的是切断全局长边后的聚类点集,每个点中的全局长边都删掉了
* 传出的东西是切掉的List
* */
private static List<TriPoint> CutLocalLongEdge(List<TriPoint> tPoint, double beta)
{
TriPoint mpoint = new TriPoint();
// 获取平均变量
double Mean_Variation = getMeanVariataion(tPoint);
List<TriPoint> tripoints = new List<TriPoint>();
// 遍历点集中的所有点,判断是否符合要求
for (int index = 0; index < tPoint.Count; index++){
// 遍历所有点,对每个点进行局部删边,并覆盖原有点
mpoint = CutLocalyPoint(tPoint[index], beta, Mean_Variation);
tripoints.Add(mpoint);
}
return tripoints;
}
/* 说明:计算在簇内的局部平均标准差,为局部CutVale准备
* 特别说明:这里调用的时候是使用Cluster.clusterPoints来使用!!!!!
*/
private static double getMeanVariataion(List<TriPoint> tPoint)
{
double sumVar = 0;
double sum = 0;
double mean = 0;
double sumdiff = 0;
foreach (TriPoint point in tPoint)
{
//计算单个
foreach (TriEdge edge in point.adjcentEdges)
{
sum += edge.Length;
}
mean = sum / point.adjcentEdges.Count;
foreach (TriEdge edge in point.adjcentEdges)
{
sumdiff += (edge.Length - mean) * (edge.Length - mean) / (point.adjcentEdges.Count - 1);
}
sumVar += Math.Sqrt(sumdiff);
}
return sumVar / tPoint.Count;
}
/*说明:计算单个点的LocalCutValue,Mean(P)^2+β*Mean_Variation(G)
* 其中,β是弹性因子,应该设置在1-1.5中间,(最后放在FrmCluster的bar上,拖动改变β,β越小,聚类越多)
* MeanVariation是之前计算的在全局的平局局部标准差*/
private static TriPoint CutLocalyPoint(TriPoint point, double beta, double Mean_Variation)
{
//计算局部二阶平均
double sum = 0, count = 0, mean;
List<TriPoint> p = new List<TriPoint>();//存二阶的点
p.Add(point);
foreach (TriEdge edge in point.adjcentEdges)
{
p.Add(edge.endPoint);
}
foreach (TriPoint pt in p)
{
foreach (TriEdge ed in pt.adjcentEdges)
{
sum += ed.Length;
count++;
}
}
mean = sum / count;
List<TriEdge> good_edges = new List<TriEdge>();
for (int i = 0; i < point.adjcentEdges.Count; i++)
{
TriEdge edge = point.adjcentEdges[i];
if (edge.Length < (mean + beta * Mean_Variation * 0.001))
{
good_edges.Add(edge);
}
}
point.adjcentEdges = good_edges;
return point;
}
#endregion
#region CutLocalLinkEdge
private static List<TriPoint> CutLocalLinkEdge(List<TriPoint> points)
{
points = Observation1(points);
points = Local_Aggregation_Criterion(points);
return points;
}
切除链和脖颈
切除链和脖颈:
/*说明:目的切掉链状的连接
* 条件(简化)相邻连通,且都只有两条边线
* */
private static List<TriPoint> Observation1(List<TriPoint> points)
{
List<TriPoint> Obs = points;
for (int i = 0; i < Obs.Count; i++)
{
List<TriEdge> good_edges = new List<TriEdge>();
if (Obs[i].adjcentEdges.Count == 2)
{
for (int index = 0; index < Obs[i].adjcentEdges.Count; index++)
{
TriEdge edge = Obs[i].adjcentEdges[index];
if (edge.endPoint.adjcentEdges.Count != 2)
{
good_edges.Add(edge);
}
}
Obs[i].adjcentEdges = good_edges;
}
}
return Obs;
}
private static List<TriPoint> Local_Aggregation_Criterion(List<TriPoint> points){
double[] vector = new double[2];
double k = 1;
TriPoint temp = new TriPoint();
foreach (TriPoint point in points)
{
vector=Vector(point, vector, k);
temp.point.X = vector[0];
temp.point.Y = vector[1];//莫混淆了,这里的point是Tri的一个属性
List<TriEdge> edges = new List<TriEdge>();
foreach (TriEdge edge in point.adjcentEdges)
{
if (edge.CosValue(temp) >= 0)
{
edges.Add(edge);
}
}
point.adjcentEdges = edges;
}
return points;
}
/*说明:计算关于点凝聚力的总方向
* k是一个参数,在这里选1
* vector出去之后要变成TriPoint,再直接用TriEdge的来算
* */
private static double[] Vector(TriPoint point, double[] vector, double k)//vec是否传得回去?
{
double temp=0;
foreach (TriEdge edge in point.adjcentEdges)
{
temp = 1 / (edge.Length * edge.Length) * k;
vector[0] += edge.endPoint.point.X;
vector[1] += edge.endPoint.point.Y;
}
vector[0] *= temp;
vector[1] *= temp;
return vector;
}
#endregion
最小生成子图
深度遍历寻找最小子图:
#region GetTheSubGrap
/*说明:在剪去全局长边之后,图上所有的点被初步聚类形成簇,每一个连通的簇被称作一个子图
* 该函数用于找到子图并且拆分归类
* */
private static List<TriCluster> getTheSubGraph(List<TriPoint> triPoint)
{
// 新建栈用于图的深度遍历
Stack<TriPoint> mystack = new Stack<TriPoint>();
List<TriCluster> temp_clusters = new List<TriCluster>();
// 循环查找子图
while (triPoint.Count > 0)
{
// 每次选定任意一个节点,开始进行深度遍历
mystack.Push(triPoint[0]);
triPoint.RemoveAt(0);
// 用于存放子图节点
List<TriPoint> temp_points = new List<TriPoint>();
// 对栈进行判断,若不为空,则删除一个节点,并加入他的子节点
while (mystack.Count > 0)
{
TriPoint triPoint1 = mystack.Pop();
//
temp_points.Add(triPoint1);
// 根据点的所有边,查找它的所有子节点,并将子节点入栈
for (int index = 0; index < triPoint1.adjcentEdges.Count; index++)
{
if (triPoint.Contains(triPoint1.adjcentEdges[index].endPoint))
{
mystack.Push(triPoint1.adjcentEdges[index].endPoint);//入栈
// 删除原有节点列表中的对应节点
triPoint.Remove(triPoint1.adjcentEdges[index].endPoint);
// 顺便删除该点对应所有子节点的对应边 triPoint1.adjcentEdges[index].endPoint.remove_edge(triPoint1.adjcentEdges[index]);
}
}
}
TriCluster temp_cluster = new TriCluster();
temp_cluster.clusterPoints = temp_points;
temp_clusters.Add(temp_cluster);
}
//@TODO:单个点的处理
return temp_clusters;
}
#endregion
运行结果
交互绘制点要素结果
构建Delaunay三角网结果
删除全局长边结果
删除局部长边结果
删除链和脖颈结果
最终实现结果
添加尺度控制因子
接下来我们趁热打铁,加入β因子,控制局部边的删除,以实现空间多尺度效应下的点群综合:
private void btnChange_Click(object sender, EventArgs e)
{
triPoint.Clear();
listTriangle.Clear();
clusters.Clear();
IFeatureLayer pFeatureLyrLine = this.axMapControl1.get_Layer(AEOperation.GetLayerByName(this.axMapControl1, "Delaunay Lines")) as IFeatureLayer;
IDataset pDataset = pFeatureLyrLine.FeatureClass as IDataset;
IWorkspace pWorkspace = pDataset.Workspace;
IWorkspaceEdit pWorkspaceEdit = pWorkspace as IWorkspaceEdit;//基本信息获取
double beta = Convert.ToDouble(textBoxBeta.Text.ToString());
DoASCDT(pFeatureLyrLine, pWorkspaceEdit, beta);
}
运行结果
(1)β=1.0,结果分成了20个簇
(2)β=1.5,结果分成了25个簇
(注:不同颜色的点代表聚类方法聚类结果,红色十字代表最后点综合结果。可见算法能够很好的校正由于空间尺度不同带来的影响,实现点群综合程度的控制)。
总结
在基于空间自适应聚类算法(ascdt算法)实现点群综合的实现过程中,解决了聚类过程中的抗噪声干扰、剪切脖颈和链、多尺度控制等问题,取得了不错的聚合综合效果。
代码下载地址:https://download.csdn.net/download/zhouxuechao/15630049