The Louvainmethod for community detection
Louvain method是一个非重叠社团发现的算法,该方法具有较快的执行效率。
对应的paper地址:http://arxiv.org/abs/0803.0476
Paper作者对算法的介绍网址:
http://perso.uclouvain.be/vincent.blondel/research/louvain.html
算法代码下载地址(只有c++版本和matlab版本,没有java版本):
https://sites.google.com/site/findcommunities/
找到c++版本时,想改成java版本,无奈发现c++版本里的数据结构略显复杂,失去耐心,就打算自己写一个java版本出来,结果自己写的不仅时间多,而且模块度值与论文中的模块度值也有一定的差距。
于是google到一个别人已经实现的java版本的代码
http://wiki.cns.iu.edu/display/CISHELL/Louvain+Community+Detection
除了实现Louvain方法之外,他还实现了一个slm算法,并将这几个算法实现打包成jar文件,源码可以在下面的网址中下载到
http://www.ludowaltman.nl/slm/
算法的总体思想是:
1.刚开始时,每一个节点形成一个小的簇
2.通过模块度函数值优化,将每一个节点归到‘最好’的簇当中去。该步骤知道所有的节点所属的簇不再变化才停止
3.将网络图进行reduce,把一个簇内的所有节点抽象成一个节点,看抽象之后的网络图是否还有优化的可能性。如果有,对抽象之后的图,继续进行第2步。
经过我对其代码的精简,Louvain算法代码如下:
ModularityOptimizer.java
/**
* ModularityOptimizer
*
* @author Ludo Waltman
* @author Nees Jan van Eck
* @version 1.2.0, 05/14/14
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.Random;
public class ModularityOptimizer
{
public static void main(String[] args) throws IOException
{
boolean update;
double modularity, maxModularity, resolution, resolution2;
int algorithm, i, j, modularityFunction, nClusters, nIterations, nRandomStarts;
int[] cluster;
long beginTime, endTime;
Network network;
Random random;
String inputFileName, outputFileName;
inputFileName = "fb.txt";
outputFileName = "communities.txt";
modularityFunction = 1;
resolution = 1.0;
algorithm = 1;
nRandomStarts = 10;
nIterations = 3;
System.out.println("Modularity Optimizer version 1.2.0 by Ludo Waltman and Nees Jan van Eck");
System.out.println();
System.out.println("Reading input file...");
System.out.println();
network = readInputFile(inputFileName, modularityFunction);
System.out.format("Number of nodes: %d%n", network.getNNodes());
System.out.format("Number of edges: %d%n", network.getNEdges() / 2);
System.out.println();
System.out.println("Running " + ((algorithm == 1) ? "Louvain algorithm" : ((algorithm == 2) ? "Louvain algorithm with multilevel refinement" : "smart local moving algorithm")) + "...");
System.out.println();
resolution2 = ((modularityFunction == 1) ? (resolution / network.getTotalEdgeWeight()) : resolution);
beginTime = System.currentTimeMillis();
cluster = null;
nClusters = -1;
maxModularity = Double.NEGATIVE_INFINITY;
random = new Random(100);
for (i = 0; i < nRandomStarts; i++)
{
if (nRandomStarts > 1)
System.out.format("Random start: %d%n", i + 1);
network.initSingletonClusters(); //网络初始化,每个节点一个簇
j = 0;
update = true;
do
{
if (nIterations > 1)
System.out.format("Iteration: %d%n", j + 1);
if (algorithm == 1)
update = network.runLouvainAlgorithm(resolution2, random);
j++;
modularity = network.calcQualityFunction(resolution2);
if (nIterations > 1)
System.out.format("Modularity: %.4f%n", modularity);
}
while ((j < nIterations) && update);
if (modularity > maxModularity)
{
cluster = network.getClusters();
nClusters = network.getNClusters();
maxModularity = modularity;
}
if (nRandomStarts > 1)
{
if (nIterations == 1)
System.out.format("Modularity: %.4f%n", modularity);
System.out.println();
}
}
endTime = System.currentTimeMillis();
if (nRandomStarts == 1)
{
if (nIterations > 1)
System.out.println();
System.out.format("Modularity: %.4f%n", maxModularity);
}
else
System.out.format("Maximum modularity in %d random starts: %.4f%n", nRandomStarts, maxModularity);
System.out.format("Number of communities: %d%n", nClusters);
System.out.format("Elapsed time: %d seconds%n", Math.round((endTime - beginTime) / 1000.0));
System.out.println();
System.out.println("Writing output file...");
System.out.println();
writeOutputFile(outputFileName, cluster);
}
private static Network readInputFile(String fileName, int modularityFunction) throws IOException
{
BufferedReader bufferedReader;
double[] edgeWeight1, edgeWeight2, nodeWeight;
int i, j, nEdges, nLines, nNodes;
int[] firstNeighborIndex, neighbor, nNeighbors, node1, node2;
Network network;
String[] splittedLine;
bufferedReader = new BufferedReader(new FileReader(fileName));
nLines = 0;
while (bufferedReader.readLine() != null)
nLines++;
bufferedReader.close();
bufferedReader = new BufferedReader(new FileReader(fileName));
node1 = new int[nLines];
node2 = new int[nLines];
edgeWeight1 = new double[nLines];
i = -1;
for (j = 0; j < nLines; j++)
{
splittedLine = bufferedReader.readLine().split("\t");
node1[j] = Integer.parseInt(splittedLine[0]);
if (node1[j] > i)
i = node1[j];
node2[j] = Integer.parseInt(splittedLine[1]);
if (node2[j] > i)
i = node2[j];
edgeWeight1[j] = (splittedLine.length > 2) ? Double.parseDouble(splittedLine[2]) : 1;
}
nNodes = i + 1;
bufferedReader.close();
nNeighbors = new int[nNodes];
for (i = 0; i < nLines; i++)
if (node1[i] < node2[i])
{
nNeighbors[node1[i]]++;
nNeighbors[node2[i]]++;
}
firstNeighborIndex = new int[nNodes + 1];
nEdges = 0;
for (i = 0; i < nNodes; i++)
{
firstNeighborIndex[i] = nEdges;
nEdges += nNeighbors[i];
}
firstNeighborIndex[nNodes] = nEdges;
neighbor = new int[nEdges];
edgeWeight2 = new double[nEdges];
Arrays.fill(nNeighbors, 0);
for (i = 0; i < nLines; i++)
if (node1[i] < node2[i])
{
j = firstNeighborIndex[node1[i]] + nNeighbors[node1[i]];
neighbor[j] = node2[i];
edgeWeight2[j] = edgeWeight1[i];
nNeighbors[node1[i]]++;
j = firstNeighborIndex[node2[i]] + nNeighbors[node2[i]];
neighbor[j] = node1[i];
edgeWeight2[j] = edgeWeight1[i];
nNeighbors[node2[i]]++;
}
{
nodeWeight = new double[nNodes];
for (i = 0; i < nEdges; i++)
nodeWeight[neighbor[i]] += edgeWeight2[i];
network = new Network(nNodes, firstNeighborIndex, neighbor, edgeWeight2, nodeWeight);
}
return network;
}
private static void writeOutputFile(String fileName, int[] cluster) throws IOException
{
BufferedWriter bufferedWriter;
int i;
bufferedWriter = new BufferedWriter(new FileWriter(fileName));
for (i = 0; i < cluster.length; i++)
{
bufferedWriter.write(Integer.toString(cluster[i]));
bufferedWriter.newLine();
}
bufferedWriter.close();
}
}
Network.java
/**
* Network
*
* @author LudoWaltman
* @author Nees Janvan Eck
* @version 1.2.0,05/14/14
*/
import java.io.Serializable;
import java.util.Random;
public class Network implements Cloneable, Serializable
{
private staticfinal long serialVersionUID = 1;
private intnNodes;
private int[]firstNeighborIndex;
private int[]neighbor;
private double[]edgeWeight;
private double[]nodeWeight;
private intnClusters;
private int[]cluster;
private double[]clusterWeight;
private int[]nNodesPerCluster;
private int[][]nodePerCluster;
private booleanclusteringStatsAvailable;
publicNetwork(int nNodes, int[] firstNeighborIndex, int[] neighbor, double[]edgeWeight, double[] nodeWeight)
{
this(nNodes,firstNeighborIndex, neighbor, edgeWeight, nodeWeight, null);
}
publicNetwork(int nNodes, int[] firstNeighborIndex, int[] neighbor, double[]edgeWeight, double[] nodeWeight, int[] cluster)
{
int i,nEdges;
this.nNodes= nNodes;
this.firstNeighborIndex = firstNeighborIndex;
this.neighbor = neighbor;
if(edgeWeight == null)
{
nEdges =neighbor.length;
this.edgeWeight = new double[nEdges];
for (i =0; i < nEdges; i++)
this.edgeWeight[i] = 1;
}
else
this.edgeWeight = edgeWeight;
if(nodeWeight == null)
{
this.nodeWeight = new double[nNodes];
for (i =0; i < nNodes; i++)
this.nodeWeight[i] = 1;
}
else
this.nodeWeight = nodeWeight;
}
public intgetNNodes()
{
returnnNodes;
}
public intgetNEdges()
{
returnneighbor.length;
}
public doublegetTotalEdgeWeight()
{
doubletotalEdgeWeight;
int i;
totalEdgeWeight = 0;
for (i = 0;i < neighbor.length; i++)
totalEdgeWeight += edgeWeight[i];
returntotalEdgeWeight;
}
public double[] getEdgeWeights()
{
returnedgeWeight;
}
public double[]getNodeWeights()
{
returnnodeWeight;
}
public intgetNClusters()
{
returnnClusters;
}
public int[]getClusters()
{
returncluster;
}
public voidinitSingletonClusters()
{
int i;
nClusters =nNodes;
cluster =new int[nNodes];
for (i = 0;i < nNodes; i++)
cluster[i] = i;
deleteClusteringStats();
}
public voidmergeClusters(int[] newCluster)
{
int i, j, k;
if (cluster== null)
return;
i = 0;
for (j = 0;j < nNodes; j++)
{
k =newCluster[cluster[j]];
if (k> i)
i =k;
cluster[j] = k;
}
nClusters =i + 1;
deleteClusteringStats();
}
public NetworkgetReducedNetwork()
{
double[]reducedNetworkEdgeWeight1, reducedNetworkEdgeWeight2;
int i, j, k,l, m, reducedNetworkNEdges1, reducedNetworkNEdges2;
int[]reducedNetworkNeighbor1, reducedNetworkNeighbor2;
NetworkreducedNetwork;
if (cluster== null)
returnnull;
if(!clusteringStatsAvailable)
calcClusteringStats();
reducedNetwork = new Network();
reducedNetwork.nNodes = nClusters;
reducedNetwork.firstNeighborIndex = new int[nClusters + 1];
reducedNetwork.nodeWeight = new double[nClusters];
reducedNetworkNeighbor1 = new int[neighbor.length];
reducedNetworkEdgeWeight1 = new double[edgeWeight.length];
reducedNetworkNeighbor2 = new int[nClusters - 1];
reducedNetworkEdgeWeight2 = new double[nClusters];
reducedNetworkNEdges1 = 0;
for (i = 0;i < nClusters; i++)
{
reducedNetworkNEdges2 = 0;
for (j = 0; j <nodePerCluster[i].length; j++)
{
k =nodePerCluster[i][j]; //k是簇i中第j个节点的id
for(l = firstNeighborIndex[k]; l < firstNeighborIndex[k + 1]; l++)
{
m = cluster[neighbor[l]]; //m是k的在l位置的邻居节点所属的簇id
if (m != i)
{
if (reducedNetworkEdgeWeight2[m] == 0)
{
reducedNetworkNeighbor2[reducedNetworkNEdges2] = m;
reducedNetworkNEdges2++;
}
reducedNetworkEdgeWeight2[m] += edgeWeight[l];
}
}
reducedNetwork.nodeWeight[i] += nodeWeight[k];
}
for (j =0; j < reducedNetworkNEdges2; j++)
{
reducedNetworkNeighbor1[reducedNetworkNEdges1 + j] = reducedNetworkNeighbor2[j];
reducedNetworkEdgeWeight1[reducedNetworkNEdges1 + j] =reducedNetworkEdgeWeight2[reducedNetworkNeighbor2[j]];
reducedNetworkEdgeWeight2[reducedNetworkNeighbor2[j]] = 0;
}
reducedNetworkNEdges1 += reducedNetworkNEdges2;
reducedNetwork.firstNeighborIndex[i + 1] = reducedNetworkNEdges1;
}
reducedNetwork.neighbor = new int[reducedNetworkNEdges1];
reducedNetwork.edgeWeight = new double[reducedNetworkNEdges1];
System.arraycopy(reducedNetworkNeighbor1, 0, reducedNetwork.neighbor, 0,reducedNetworkNEdges1);
System.arraycopy(reducedNetworkEdgeWeight1, 0,reducedNetwork.edgeWeight, 0, reducedNetworkNEdges1);
returnreducedNetwork;
}
public doublecalcQualityFunction(double resolution)
{
doublequalityFunction, totalEdgeWeight;
int i, j, k;
if (cluster== null)
returnDouble.NaN;
if(!clusteringStatsAvailable)
calcClusteringStats();
qualityFunction = 0;
totalEdgeWeight = 0;
for (i = 0;i < nNodes; i++)
{
j =cluster[i];
for (k =firstNeighborIndex[i]; k < firstNeighborIndex[i + 1]; k++)
{
if(cluster[neighbor[k]] == j)
qualityFunction += edgeWeight[k];
totalEdgeWeight += edgeWeight[k];
}
}
for (i = 0;i < nClusters; i++)
qualityFunction -= clusterWeight[i] * clusterWeight[i] * resolution;
qualityFunction /= totalEdgeWeight;
returnqualityFunction;
}
public booleanrunLocalMovingAlgorithm(double resolution)
{
returnrunLocalMovingAlgorithm(resolution, new Random());
}
public booleanrunLocalMovingAlgorithm(double resolution, Random random)
{
booleanupdate;
doublemaxQualityFunction, qualityFunction;
double[]clusterWeight, edgeWeightPerCluster;
intbestCluster, i, j, k, l, nNeighboringClusters, nStableNodes, nUnusedClusters;
int[]neighboringCluster, newCluster, nNodesPerCluster, nodeOrder, unusedCluster;
if ((cluster== null) || (nNodes == 1))
returnfalse;
update =false;
clusterWeight = new double[nNodes];
nNodesPerCluster = new int[nNodes];
for (i = 0;i < nNodes; i++)
{
clusterWeight[cluster[i]] += nodeWeight[i];
nNodesPerCluster[cluster[i]]++;
}
nUnusedClusters = 0;
unusedCluster = new int[nNodes];
for (i = 0;i < nNodes; i++)
if(nNodesPerCluster[i] == 0)
{
unusedCluster[nUnusedClusters] = i;
nUnusedClusters++;
}
nodeOrder =new int[nNodes];
for (i = 0;i < nNodes; i++)
nodeOrder[i] = i;
for (i = 0;i < nNodes; i++)
{
j = random.nextInt(nNodes);
k =nodeOrder[i];
nodeOrder[i] = nodeOrder[j];
nodeOrder[j] = k;
}
edgeWeightPerCluster = new double[nNodes];
neighboringCluster = new int[nNodes - 1];
nStableNodes = 0;
i = 0;
do
{
j =nodeOrder[i];
nNeighboringClusters = 0;
for (k =firstNeighborIndex[j]; k < firstNeighborIndex[j + 1]; k++)
{
l =cluster[neighbor[k]];
if(edgeWeightPerCluster[l] == 0)
{
neighboringCluster[nNeighboringClusters] = l;
nNeighboringClusters++;
}
edgeWeightPerCluster[l]+= edgeWeight[k];
}
clusterWeight[cluster[j]] -= nodeWeight[j];
nNodesPerCluster[cluster[j]]--;
if(nNodesPerCluster[cluster[j]] == 0)
{
unusedCluster[nUnusedClusters] = cluster[j];
nUnusedClusters++;
}
bestCluster = -1;
maxQualityFunction = 0;
for (k =0; k < nNeighboringClusters; k++)
{
l =neighboringCluster[k];
qualityFunction = edgeWeightPerCluster[l] - nodeWeight[j] *clusterWeight[l] * resolution;
if((qualityFunction > maxQualityFunction) || ((qualityFunction ==maxQualityFunction) && (l < bestCluster)))
{
bestCluster = l;
maxQualityFunction = qualityFunction;
}
edgeWeightPerCluster[l] = 0;
}
if(maxQualityFunction == 0)
{
bestCluster = unusedCluster[nUnusedClusters - 1];
nUnusedClusters--;
}
clusterWeight[bestCluster] += nodeWeight[j];
nNodesPerCluster[bestCluster]++;
if(bestCluster == cluster[j])
nStableNodes++;
else
{
cluster[j] = bestCluster;
nStableNodes = 1;
update = true;
}
i = (i< nNodes - 1) ? (i + 1) : 0;
}
while(nStableNodes < nNodes); //优化步骤是直到所有的点都稳定下来才结束
newCluster =new int[nNodes];
nClusters =0;
for (i = 0;i < nNodes; i++)
if(nNodesPerCluster[i] > 0)
{
newCluster[i] = nClusters;
nClusters++;
}
for (i = 0;i < nNodes; i++)
cluster[i] = newCluster[cluster[i]];
deleteClusteringStats();
returnupdate;
}
public booleanrunLouvainAlgorithm(double resolution)
{
return runLouvainAlgorithm(resolution,new Random());
}
public booleanrunLouvainAlgorithm(double resolution, Random random)
{
booleanupdate, update2;
NetworkreducedNetwork;
if ((cluster== null) || (nNodes == 1))
returnfalse;
update =runLocalMovingAlgorithm(resolution, random);
if(nClusters < nNodes)
{
reducedNetwork = getReducedNetwork();
reducedNetwork.initSingletonClusters();
update2= reducedNetwork.runLouvainAlgorithm(resolution, random);
if(update2)
{
update = true;
mergeClusters(reducedNetwork.getClusters());
}
}
deleteClusteringStats();
return update;
}
privateNetwork()
{
}
private voidcalcClusteringStats()
{
int i, j;
clusterWeight = new double[nClusters];
nNodesPerCluster = new int[nClusters];
nodePerCluster = new int[nClusters][];
for (i = 0;i < nNodes; i++)
{
clusterWeight[cluster[i]] += nodeWeight[i];
nNodesPerCluster[cluster[i]]++;
}
for (i = 0;i < nClusters; i++)
{
nodePerCluster[i] = new int[nNodesPerCluster[i]];
nNodesPerCluster[i] = 0;
}
for (i = 0;i < nNodes; i++)
{
j =cluster[i];
nodePerCluster[j][nNodesPerCluster[j]] = i;
nNodesPerCluster[j]++;
}
clusteringStatsAvailable = true;
}
private voiddeleteClusteringStats()
{
clusterWeight = null;
nNodesPerCluster = null;
nodePerCluster = null;
clusteringStatsAvailable = false;
}
}
算法的效果
对karate数据集
分簇结果
在facebook,4039个数据集上的结果
下载的java 版本代码不好理解,尤其是 firstneighborindex 等数组表示什么意思等,于是我借鉴它,实现了一个新的版本,这里面的代码容易理解一些,并且给里面的变量、函数以及关键步骤都加了一些注释
而且在代码中增加了输出分簇之后的网络图的功能,输出一个gml文件,输入节点的节点和边信息都保留了,初次之外,加上了分簇的结果,即每一个节点属于哪一个簇,在node中加了一个type字段来表示,这样就可以在gephi中直接依据type字段来查看分割好的社团。
与下载的不一致的是,我们的输入文件第一行需要给出网络中节点的数量
以下是程序的代码
DataSet.java
packagecommunitydetection;
importjava.io.*;
importjava.util.*;
public classDataSet {
double weight[][];
LinkedList neighborlist[];
double totalEdgeWeights = 0;
int nodecount;
double nodeweight[];
DataSet(String filename) throws IOException
{
BufferedReader reader = newBufferedReader(new FileReader(filename));
String line = reader.readLine();
nodecount = Integer.parseInt(line); //读文件,文件第一行为节点的数量
weight = new double[nodecount][nodecount];
neighborlist = new LinkedList[nodecount];
for(int i=0 ;i < nodecount;i++)
neighborlist[i] = newLinkedList<Integer>();
nodeweight = new double[nodecount];
for(int i=0;i<nodecount;i++)
nodeweight[i] = 0;
while((line = reader.readLine())!=null)
{
String args[] =line.split("\t");
int node1 =Integer.parseInt(args[0]);
int node2 = Integer.parseInt(args[1]);
if(node1 != node2)
{
neighborlist[node1].add(node2);
neighborlist[node2].add(node1);
}
if(args.length > 2)
{
double we =Double.parseDouble(args[2]);
weight[node1][node2] = we;
weight[node2][node1] = we;
totalEdgeWeights += we;
nodeweight[node1] += we;
if(node2 != node1)
nodeweight[node2] += we;
}
else
{
weight[node1][node2] = 1;
weight[node2][node1] = 1;
totalEdgeWeights += 1;
nodeweight[node1] += 1;
if(node2 != node1)
nodeweight[node2] += 1;
}
}
reader.close();
}
}
NetWork.java
package communitydetection;
import java.util.*;
import java.io.*;
public class NetWork {
double weight[][]; //图中两节点的连接边的权重
LinkedListneighborlist[]; //每个节点的邻居节点有哪些
doublenodeweight[]; //每个节点的权值
intnodecount; //图中节点的总数量
intcluster[]; //记录每个节点属于哪一个簇
intclustercount; //簇的总数量
doubleclusterweight[]; //每一个簇的权重
booleanclusteringStatsAvailable ; //当前的网络信息是否完全
intnodecountsPercluster[]; //每个簇有多少个节点
intnodePercluster[][]; //nodePercluster[i][j]表示簇i中第j个节点的id
NetWork(Stringfilename) throws IOException
{
DataSetds = new DataSet(filename); //ds是用来读取输入文件内容的
weight= ds.weight;
neighborlist= ds.neighborlist;
nodecount= ds.nodecount;
nodeweight= ds.nodeweight;
initSingletonClusters();
}
NetWork()
{
}
publicdouble getTotalEdgeWeight() //获取网络图中所有的边的权值之和, 返回的结果实际上是2倍
{ //因为每一条边被计算了2次
doubletotalEdgeWeight;
int i;
totalEdgeWeight = 0;
for (i= 0; i < nodecount; i++)
{
for(intj=0;j<neighborlist[i].size();j++)
{
int neighborid =(Integer)neighborlist[i].get(j);
totalEdgeWeight +=weight[i][neighborid];
}
}
returntotalEdgeWeight;
}
publicvoid initSingletonClusters() //给每个节点指派一个簇
{
int i;
clustercount = nodecount;
cluster = new int[nodecount];
for (i= 0; i < nodecount; i++)
cluster[i] = i;
deleteClusteringStats();
}
privatevoid calcClusteringStats() //统计好每个簇有多少个节点,以及每个簇中的节点都有哪些
{
int i,j;
clusterweight = new double[clustercount];
nodecountsPercluster = new int[clustercount];
nodePercluster = new int[clustercount][];
for (i= 0; i < nodecount; i++)
{
//计算每一个簇的权值,簇的权值是其中的节点的权值之和
clusterweight[cluster[i]] += nodeweight[i];
nodecountsPercluster[cluster[i]]++;
}
for (i= 0; i < clustercount; i++)
{
nodePercluster[i] = new int[nodecountsPercluster[i]];
nodecountsPercluster[i] = 0;
}
for (i= 0; i < nodecount; i++)
{
j= cluster[i]; //j是簇编号, 记录每一个簇中的第几个节点的id
nodePercluster[j][nodecountsPercluster[j]] = i;
nodecountsPercluster[j]++;
}
clusteringStatsAvailable = true;
}
privatevoid deleteClusteringStats()
{
clusterweight = null;
nodecountsPercluster = null;
nodePercluster = null;
clusteringStatsAvailable = false;
}
publicint[] getClusters()
{
returncluster;
}
publicdouble calcQualityFunction(double resolution) //计算模块度的值, 如果所有边的权重之和为m
{ //那么resolution就是1/(2m)
doublequalityFunction, totalEdgeWeight;
int i,j, k;
if(cluster == null)
return Double.NaN;
if(!clusteringStatsAvailable)
calcClusteringStats();
qualityFunction = 0;
totalEdgeWeight = 0;
for (i= 0; i < nodecount; i++)
{
j= cluster[i];
for( k=0;k<neighborlist[i].size();k++)
{
int neighborid =(Integer)neighborlist[i].get(k);
if(cluster[neighborid] == j)
qualityFunction += weight[i][neighborid];
totalEdgeWeight += weight[i][neighborid];
} //最终的totalEdgeWeight也是
//图中所有边权重之和的2倍
}
for (i= 0; i < clustercount; i++)
qualityFunction -= clusterweight[i] * clusterweight[i] * resolution;
qualityFunction /= totalEdgeWeight;
returnqualityFunction;
}
public intgetNClusters()
{
returnclustercount;
}
public voidmergeClusters(int[] newCluster) //newcluster是reduced之后的reducednetwork中的所属簇信息
{
int i,j, k;
if(cluster == null)
return;
i = 0;
for (j= 0; j < nodecount; j++)
{
k= newCluster[cluster[j]]; //reducednetwork中的newcluster的一个下标相当于
if(k > i) //reduce之前的网络中的每一个簇
i = k;
cluster[j] = k;
}
clustercount = i + 1;
deleteClusteringStats();
}
publicNetWork getReducedNetwork() //将整个网络进行reduce操作
{
double[] reducedNetworkEdgeWeight2;
int i,j, k, l, m,reducedNetworkNEdges2;
int[] reducedNetworkNeighbor2;
NetWork reducedNetwork;
if (cluster== null)
return null;
if(!clusteringStatsAvailable)
calcClusteringStats();
reducedNetwork = new NetWork();
reducedNetwork.nodecount = clustercount; //reduce之后,原来的一个簇就是现在的一个节点
reducedNetwork.neighborlist = new LinkedList[clustercount];
for(i=0;i<clustercount;i++)
reducedNetwork.neighborlist[i] = newLinkedList();
reducedNetwork.nodeweight = new double[clustercount];
reducedNetwork.weight = new double[clustercount][clustercount];
reducedNetworkNeighbor2 = new int[clustercount -1];
reducedNetworkEdgeWeight2 = new double[clustercount];
for (i= 0; i < clustercount; i++)
{
reducedNetworkNEdges2 = 0;
for (j = 0; j < nodePercluster[i].length; j++)
{
k = nodePercluster[i][j]; //k是原来的簇i中第j个节点的id
for( l=0;l<neighborlist[k].size();l++)
{
int nodeid =(Integer)neighborlist[k].get(l);
m = cluster[nodeid];
if( m != i) //reduce之前簇i与簇m相连,应为它们之间有边存在,edge(k,nodeid)
{
if(reducedNetworkEdgeWeight2[m]== 0)
{
//以前的簇邻居变成了现在的节点邻居
reducedNetworkNeighbor2[reducedNetworkNEdges2]= m;
reducedNetworkNEdges2++;
//reducedNetworkEdges记录在新的图中新的节点i(原来的簇i)有多少个邻居
}
reducedNetworkEdgeWeight2[m]+= weight[k][nodeid];
//新的节点i与新的邻居节点m的之间的边权重的更新
}
}
reducedNetwork.nodeweight[i] += nodeweight[k];
//现在的节点i是以前的簇i, 以前的节点k在以前的簇i中,所以现在的节点i的权重包含以前节点k的权重
}
for (j = 0; j < reducedNetworkNEdges2; j++)
{
reducedNetwork.neighborlist[i].add(reducedNetworkNeighbor2[j]);
reducedNetwork.weight[i][reducedNetworkNeighbor2[j]] =reducedNetworkEdgeWeight2[reducedNetworkNeighbor2[j]];
reducedNetworkEdgeWeight2[reducedNetworkNeighbor2[j]] = 0;
// =0是为了数组复用,不然每次都要开辟新的数组存放与新的邻居节点之间的边的权值
}
}
returnreducedNetwork;
}
publicboolean runLocalMovingAlgorithm(double resolution)
{
returnrunLocalMovingAlgorithm(resolution, new Random());
}
//将每一个节点移入到'最好'的簇当中去
publicboolean runLocalMovingAlgorithm(double resolution, Random random)
{
boolean update;
doublemaxQualityFunction, qualityFunction;
double[] clusterWeight, edgeWeightPerCluster;
intbestCluster, i, j, k, l, nNeighboringClusters, nStableNodes, nUnusedClusters;
int[]neighboringCluster, newCluster, nNodesPerCluster, nodeOrder, unusedCluster;
if((cluster == null) || (nodecount == 1))
return false;
update= false;
clusterWeight = new double[nodecount];
nNodesPerCluster = new int[nodecount]; //记录每一个簇中有多少个节点
for (i = 0; i < nodecount; i++)
{
clusterWeight[cluster[i]] += nodeweight[i];
nNodesPerCluster[cluster[i]]++;
}
nUnusedClusters = 0;
unusedCluster = new int[nodecount]; //统计在整个过程当中,哪些簇一个节点也没有
for (i= 0; i <nodecount; i++) //这些簇在之后会被消除
if(nNodesPerCluster[i] == 0)
{
unusedCluster[nUnusedClusters] = i;
nUnusedClusters++;
}
nodeOrder = new int[nodecount];
for (i= 0; i < nodecount; i++)
nodeOrder[i] = i;
for (i= 0; i < nodecount; i++) //nodeOrder将节点顺序打乱,防止由于顺序问题得不到最好的结果
{
j= random.nextInt(nodecount);
k= nodeOrder[i];
nodeOrder[i] = nodeOrder[j];
nodeOrder[j] = k;
}
edgeWeightPerCluster = new double[nodecount];
neighboringCluster = new int[nodecount -1];
nStableNodes = 0;
i = 0;
do
{
j= nodeOrder[i]; //j是某一个节点的编号
nNeighboringClusters = 0;
for(k = 0; k<neighborlist[j].size();k++)
{
int nodeid =(Integer)neighborlist[j].get(k); //nodeid是j的一个邻居节点的编号
l = cluster[nodeid]; //l是nodeid所属的簇编号
if(edgeWeightPerCluster[l] == 0) //统计与节点j相连的簇有哪些
{
neighboringCluster[nNeighboringClusters] = l;
nNeighboringClusters++;
}
edgeWeightPerCluster[l] +=weight[j][nodeid];
//edgeWeightperCluster[l]记录的是如果将节点j加入到簇l当中,簇l内部的边权值的增量大小
}
//节点j之前所属的簇做相应的变更
clusterWeight[cluster[j]] -= nodeweight[j];
nNodesPerCluster[cluster[j]]--;
if(nNodesPerCluster[cluster[j]] == 0)
{
unusedCluster[nUnusedClusters] = cluster[j];
nUnusedClusters++;
}
bestCluster = -1; //最好的加入的簇下标
maxQualityFunction = 0;
for (k = 0; k < nNeighboringClusters; k++)
{
l = neighboringCluster[k];
qualityFunction = edgeWeightPerCluster[l] - nodeweight[j] *clusterWeight[l] * resolution;
if ((qualityFunction > maxQualityFunction) || ((qualityFunction ==maxQualityFunction) && (l < bestCluster)))
{
bestCluster = l;
maxQualityFunction = qualityFunction;
}
edgeWeightPerCluster[l] = 0;
// =0是为了数组复用,
}
if(maxQualityFunction == 0) //无论到哪一簇都不会有提升
{
bestCluster = unusedCluster[nUnusedClusters - 1];
nUnusedClusters--;
}
clusterWeight[bestCluster] += nodeweight[j];
nNodesPerCluster[bestCluster]++; //最佳簇的节点数量+1
if(bestCluster == cluster[j])
nStableNodes++; //还在原来的簇当中,表示该节点是稳定的,稳定节点的数量+1
else
{
cluster[j] = bestCluster;
nStableNodes = 1;
update = true; //能移动到新的簇当中去,然后需要重新考虑每个节点是否稳定
}
i= (i < nodecount - 1) ? (i + 1) : 0;
}
while(nStableNodes < nodecount); //优化步骤是直到所有的点都稳定下来才结束
newCluster = new int[nodecount];
clustercount = 0;
for (i= 0; i < nodecount; i++)
if(nNodesPerCluster[i] > 0)
{ //统计以前的簇编号还有多少能用,然后从0开始重新对它们编号
newCluster[i] = clustercount;
clustercount++;
}
for (i= 0; i < nodecount; i++)
cluster[i] = newCluster[cluster[i]];
deleteClusteringStats();
returnupdate;
}
publicboolean runLouvainAlgorithm(double resolution)
{
returnrunLouvainAlgorithm(resolution, new Random());
}
publicboolean runLouvainAlgorithm(double resolution, Random random)
{
boolean update, update2;
NetWork reducedNetwork;
if((cluster == null) || (nodecount == 1))
return false;
update= runLocalMovingAlgorithm(resolution, random);
//update表示是否有节点变动,即是否移动到了新的簇当中去
if(clustercount < nodecount) //簇的数量小于节点的数量,说明可以进行reduce操作,ruduce不会改变
{ //modularity的值
reducedNetwork = getReducedNetwork();
reducedNetwork.initSingletonClusters();
update2 = reducedNetwork.runLouvainAlgorithm(resolution, random);
//update2表示在reduce之后的网络中是否有节点移动到新的簇当中去
if(update2)
{
update = true;
mergeClusters(reducedNetwork.getClusters());
//有变动的话,至少是簇的顺序变掉了,或者是簇的数量减少了
}
}
deleteClusteringStats();
returnupdate;
}
publicvoid generategml() throws IOException
{
BufferedWriter writer = newBufferedWriter(new FileWriter("generated.gml"));
writer.append("graph\n");
writer.append("[\n");
for(int i=0;i<nodecount;i++)
{
writer.append(" node\n");
writer.append(" [\n");
writer.append(" id "+i+"\n");
writer.append(" type "+cluster[i]+"\n");
writer.append(" ]\n");
}
for(int i=0;i<nodecount;i++)
for(int j=i+1;j<nodecount;j++)
{
if(weight[i][j] != 0)
{
writer.append(" edge\n");
writer.append(" [\n");
writer.append(" source "+i+"\n");
writer.append(" target "+j+"\n");
writer.append(" ]\n");
}
}
writer.append("]\n");
writer.close();
}
}
Main.java
package communitydetection;
import java.io.*;
import java.util.*;
public class Main {
staticvoid detect() throws IOException
{
NetWork network;
Stringfilename = "karate_club_network.txt";
doublemodularity, resolution, maxModularity;
doublebeginTime, endTime;
int[]cluster;
intnRandomStarts = 5;
intnIterations = 3;
network= new NetWork(filename);
resolution= 1.0/network.getTotalEdgeWeight();
beginTime= System.currentTimeMillis();
cluster = null;
int nClusters= -1;
inti,j;
maxModularity = Double.NEGATIVE_INFINITY;
Randomrandom = new Random(100);
for (i= 0; i < nRandomStarts; i++)
{
if( (nRandomStarts > 1))
System.out.format("Random start: %d%n", i + 1);
network.initSingletonClusters(); //网络初始化,每个节点一个簇
j= 0;
boolean update = true; //update表示网络是否有节点移动
do
{
if ( (nIterations > 1))
System.out.format("Iteration: %d%n", j + 1);
update = network.runLouvainAlgorithm(resolution, random);
j++;
modularity = network.calcQualityFunction(resolution);
if ((nIterations > 1))
System.out.format("Modularity: %.4f%n", modularity);
}
while ((j < nIterations) && update);
if(modularity > maxModularity)
{
cluster = network.getClusters();
nClusters = network.getNClusters();
maxModularity = modularity;
}
if((nRandomStarts > 1))
{
if (nIterations == 1)
System.out.format("Modularity: %.4f%n", modularity);
System.out.println();
}
}
endTime = System.currentTimeMillis();
network.generategml();
if(nRandomStarts == 1)
{
if(nIterations > 1)
System.out.println();
System.out.format("Modularity: %.4f%n", maxModularity);
}
else
System.out.format("Maximum modularity in %d random starts:%.4f%n", nRandomStarts, maxModularity);
System.out.format("Number of communities: %d%n", nClusters);
System.out.format("Elapsed time: %f seconds%n", (endTime -beginTime) / 1000.0);
System.out.println();
System.out.println("Writingoutput file...");
System.out.println();
// writeOutputFile("communities.txt", cluster);
}
//将每一个节点所属的簇的下标写到文件当中去
privatestatic void writeOutputFile(String fileName, int[] cluster) throws IOException
{
BufferedWriter bufferedWriter;
int i;
bufferedWriter = new BufferedWriter(new FileWriter(fileName));
for (i= 0; i < cluster.length; i++)
{
bufferedWriter.write(Integer.toString(cluster[i]));
bufferedWriter.newLine();
}
bufferedWriter.close();
}
publicstatic void main(String args[]) throws IOException
{
detect();
}
}
算法运行结果
Karate结果
分簇结果
生成的gml文件
导入到Gephi中,按照节点的type分割的结果
Facebook上4039个数据的结果
将生成的gml文件导入到Gephi中,并依据节点的type分割的结果