BA无边度网络模型构造算法
(1)增长:从一个具有 m_0 个节点的联通网络开始,每次引入一个新的节点, 并且连到 m 个已经存在的节点上,这里 m <= m_0。
(2)优先连接:一个新的节点与一个已经存在的节点 i 相连的概率 w 与节点 i 的度 k_i 之间的关系为 w = k_i / ( k_1 + k_2 + k_3 + ... + k_n ),其中n为网络中的节点的总个数。
特别的说明下代码中涉及到结构体Node的作用和思路:
三个数据域:
(1) degree:表示节点的度
(2) weight:表示被选择的概率,即 w_i = k_i / ( k_1 + k_2 + ... + k_n )
(3) probabilityDistribution: 给出每一个节点的概率分布,作用是通过产生0~1之间的随机数来做出决策。
为什么在有了weight的情况下还需要用probabilityDistribution?
example: 假设在一个网络中一共有5个节点,每个节点的度如下: d_1 = 4, d_2 = 1, d_3 = 2, d_4 = 2, d_5 = 1.
那么可以计算出每个节点的weight如下: w_1 = 0.4, w_2 = 0.1, w_3 = 0.2, w_4 = 0.2, w_5 = 0.1
也就是说, 当有一个新的节点出现时候,它连接到节点1的概率为0.4,连接到节点2的概率为0.1, ...
可以用下图来表示:
这个时候,这个新的节点要选择已有网络中的那个节点连接是随机的,但是和这些已有节点的度是成正比的,度愈大的节点越有可能被连接,此时,由系统产生一个 0~1 之间的随机数,比如0.6,那么则选择新的节点与节点 3 相连。
代码实现
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#include<string.h>
#include "for_plot.c"
int NETWORK_SIZE, M, M_0;
struct Node;
typedef struct Node* NodePtr;
typedef struct Node{
int degree;
double weight;
double probabilityDistribution;
}Node;
Node* decisionMaking;
int** adjacentMatrix;
int* initalNetwork;
void initial();
void initalNetwork_M0_connected();
void updateDecisionMakingData();
void generateFreeScaleNetwork();
void showAdjacentMatrix();
void writeDataToFile();
int main(int argc, char** argv)
{
if( 4 != argc )
{
printf("this algorithm requires 4 user-specify parameters\n");
printf("\t1.the size of network\n");
printf("\t2.the initial size of network\n");
printf("\t1.the size of \n");
printf("\texample: \"a.exe 100 3 3\"\n");
exit(0);
}
NETWORK_SIZE = atoi(argv[1]);
M_0 = atoi(argv[2]);
M = atoi(argv[3]);
srand((unsigned)time(NULL));
initial();
initalNetwork_M0_connected();
generateFreeScaleNetwork();
writeDataToFile();
showAdjacentMatrix();
write2file(adjacentMatrix, NETWORK_SIZE, "freeScale_edges.data");
return 0;
}
void initial()
{
if( !(decisionMaking = (NodePtr)malloc(sizeof(Node) * (NETWORK_SIZE + 1))) )
{
printf("decisionMaking* malloc error\n");
exit(0);
}
if( !(adjacentMatrix = (int**)malloc(sizeof(int*) * (NETWORK_SIZE + 1))) )
{
printf("adjacentMatrix** malloc error\n");
exit(0);
}
int i;
for( i = 1; i <= NETWORK_SIZE; i++ )
{
if( !(adjacentMatrix[i] = (int*)malloc(sizeof(int) * (NETWORK_SIZE + 1))) )
{
printf("adjacentMatrix[%d]* malloc error\n", i);
exit(0);
}
}
if( !(initalNetwork = (int*)malloc(sizeof(int) * (M_0 + 1))) )
{
printf("initalNetwork* malloc error\n");
exit(0);
}
}
/*
* 初始化:在NETWORK_SIZE中随机选择M_0个节点构成连通的网络。
* */
void initalNetwork_M0_connected(){
int i, j, randomFirst, randomSecond;
for( i = 1; i <= NETWORK_SIZE; i++ )
for( j = 1; j <= NETWORK_SIZE; j++ )
adjacentMatrix[i][j] = 0;
// 随机产生M_0个节点
for( i = 1; i <= M_0; i++ )
{
initalNetwork[i] = rand() % NETWORK_SIZE + 1;
for( j = 1; j < i; j++ )
if( initalNetwork[i] == initalNetwork[j] )
{
i--;
break;
}
}
for( i = 1; i < M_0; i++ )
adjacentMatrix[initalNetwork[i]][initalNetwork[i+1]] = adjacentMatrix[initalNetwork[i+1]][initalNetwork[i]] = 1;
adjacentMatrix[initalNetwork[M_0]][initalNetwork[1]] = adjacentMatrix[initalNetwork[1]][initalNetwork[M_0]] = 1;
//showAdjacentMatrix();
updateDecisionMakingData();
}
/*
* 通过adjacentMatrix更新decisionMaking数组
* */
void updateDecisionMakingData(){
int i, j, totalDegree = 0;
for( i = 1; i <= NETWORK_SIZE; i++ )
decisionMaking[i].degree = 0;
for( i = 1; i <= NETWORK_SIZE; i++ )
for( j = 1; j <= NETWORK_SIZE; j++ )
decisionMaking[i].degree += adjacentMatrix[i][j];
for( i = 1; i <= NETWORK_SIZE; i++ )
totalDegree += decisionMaking[i].degree;
for( i = 1; i <= NETWORK_SIZE; i++ )
decisionMaking[i].weight = decisionMaking[i].degree/(double)totalDegree;
decisionMaking[1].probabilityDistribution = decisionMaking[1].weight;
for( i = 2; i <= NETWORK_SIZE; i++ )
decisionMaking[i].probabilityDistribution = decisionMaking[i - 1].probabilityDistribution + decisionMaking[i].weight;
}
/*
* 构造BA无标度网络模型
* */
void generateFreeScaleNetwork(){
int i, k, j = 1, length = 0;
int random_auxiliary_old[NETWORK_SIZE + 1];
int random_auxiliary[NETWORK_SIZE + 1 - M_0];
/*
* 要保证每次引入一个<新的>的节点,所以要随机选择不重复的节点加入,并且把初始网络中的M_0个节点先删除
* */
for( i = 1; i <= NETWORK_SIZE; i++ )
random_auxiliary_old[i] = i;
for( i = 1; i <= M_0; i++ )
random_auxiliary_old[initalNetwork[i]] = 0;
for( i = 1; i <= NETWORK_SIZE; i++ )
if( random_auxiliary_old[i] != 0 )
random_auxiliary[j++] = random_auxiliary_old[i];
/*
* 添加新的节点构造无标度网络
* */
int new_node_index, new_node_value;
double random_decision = 0.0;
int targetNode; //表示找到的已经在网络中的将要连接的节点
length = NETWORK_SIZE - M_0;
int flag;
for( i = 1; i <= NETWORK_SIZE - M_0; i++ )
{
new_node_index = rand() % length + 1;
new_node_value = random_auxiliary[new_node_index];
random_auxiliary[new_node_index] = random_auxiliary[length--];
for( j = 1; j <= M; j++ ) //根据概率连接到已存在网络中的M个节点,不可以重边,不可以自连。
{
flag = 0;
random_decision = (rand()%1000)/(double)1000;
for( k = 1; k <= NETWORK_SIZE; k++ )
{
// 从第一个节点到最后一个节点比较probabilityDistribution和random_desction的大小,
// 由于probabilityDistribution是有序的,所以可以使用一些高级的算法来提高查找的效率.
if( decisionMaking[k].probabilityDistribution >= random_decision && decisionMaking[k].degree != 0 && adjacentMatrix[new_node_value][k] != 1 )
{
/*
*
* 如何按照可能性大小来选择要连哪一个点:
* 选择的已经在网络中的点是:随机产生的0-1之间的概率p,找这样的点:
* 它的累加概率(probabilityDistribution)是大于p的最小的值所对应的点。
*
*/
targetNode = k;
flag = 1;
break;
}
}
if( flag == 0 )
/*
* 之前少考虑了这种情况,因为总要选择一个网络中的点接入。但是当产生了比较大的随机概率p,可能
* 在他后面(按probabilityDistribution来说)没有可选的点(要么选择过了,要么不在网络中),则重新开始
*/
{
for( k = 1; k <= NETWORK_SIZE; k++ )
{
if( decisionMaking[k].degree != 0 && adjacentMatrix[new_node_value][k] != 1 )
{
targetNode = k;
break;
}
}
}
//printf(" target node is %d\n", targetNode);
adjacentMatrix[new_node_value][targetNode] = adjacentMatrix[targetNode][new_node_value] = 1;
}
updateDecisionMakingData(); //else新选的加入节点和已有网络中的M个边都链接后再更新
}
}
void showAdjacentMatrix(){
int i, j;
int numberOfEage = 0;
printf("\tshow adjacentMatrix\n");
for( i = 1; i <= NETWORK_SIZE; i++ )
{
for( j = 1; j <= NETWORK_SIZE; j++ )
{
printf("%d", adjacentMatrix[i][j]);
if( adjacentMatrix[i][j] == 1 )
numberOfEage++;
}
printf("\n");
}
printf("the number of eage is %d\n", numberOfEage/2);
}
void writeDataToFile(){
FILE* fout;
if( NULL == (fout = fopen("freeScaleNetwork.data", "w")))
{
printf("open file(freeScaleNetwork) error!\n");
exit(0);
}
int i;
int j;
for( i = 1; i <= NETWORK_SIZE; i++ )
{
for( j = 1; j <= NETWORK_SIZE; j++ )
fprintf(fout, "%d ", adjacentMatrix[i][j]);
fprintf(fout, "\n");
}
}
以下分别是该算法产生的BA网络的可视化图以及度分布。
for_plot.c文件
/*
* 将给定的网络@adjacentMatrix(节点的个数为@size)中的所有的连边以有序对的
* 形式输出到文件@out_filename中,每一对使用','隔开,方便python处理。
* 该函数被所有产生网络结构的函数(generateRandomNetwork.c,
* generateSmallNetwork.c和generateFreeScale.c)调用
* */
void write2file(int** adjacentMatrix, int size, char* out_filename)
{
int i, j;
FILE* fout;
if( NULL == (fout = fopen(out_filename,"w")) )
{
printf("%s cann't open!\n", out_filename);
exit(0);
}
for( i = 1; i <= size; i++ )
{
for( j = i + 1; j <= size; j++ )
{
if( adjacentMatrix[i][j] )
{
fprintf(fout, "%d %d\n", i, j);
}
}
}
fclose(fout);
}
计算网络中节点的度分布的代码(网络大小即宏NETWORK_SIZE要按照实际网络的大小修改)
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define NETWORK_SIZE 20000
char targetfilename[200];
char distribution[200];
int adjacentMatrix[NETWORK_SIZE + 1][NETWORK_SIZE + 1];
int degree[NETWORK_SIZE + 1]; //统计每一个节点的度
double statistic[NETWORK_SIZE]; //用来统计,statistic[2] = 4,表示度为2的点有4个,有度为0的,
//不可能有度为NETWORK_SIZE的点
void readDataFromFile();
void calculateDegreeDistribution();
void writeDataToFile();
int main(int argc, char* argv[]){
if( argc != 2 )
{
printf("need a parameter to indicate the network data name\n");
printf("for example: smallworldnetwork.data\n");
exit(0);
}
strcat(targetfilename, argv[1]);
printf("%s\n", targetfilename);
readDataFromFile();
calculateDegreeDistribution();
writeDataToFile();
return 0;
}
/*
* 读入网络的结构
* */
void readDataFromFile(){
FILE* fread;
if( NULL == (fread = fopen(targetfilename, "r")))
{
printf("open file(%s) error!\n");
exit(0);
}
int i;
int j;
for( i = 1; i <= NETWORK_SIZE; i++ ){
for( j = 1; j <= NETWORK_SIZE; j++ )
{
if( 1 != fscanf(fread, "%d ", &adjacentMatrix[i][j]))
{
printf("fscanf error: file: %s\t(%d, %d)\n", targetfilename, i, j);
exit(0);
}
}
}
fclose(fread);
}
void calculateDegreeDistribution(){
int i;
int j;
double averageDegree = 0.0;
for( i = 1; i <= NETWORK_SIZE; i++ )
for( j = 1; j <= NETWORK_SIZE; j++ )
degree[i] = degree[i] + adjacentMatrix[i][j];
for( i = 1; i <= NETWORK_SIZE; i++ )
averageDegree += degree[i];
printf("%f----<k> = %f\n", averageDegree,averageDegree/NETWORK_SIZE);
for( i = 1; i <= NETWORK_SIZE; i++ )
statistic[degree[i]]++;
double indentify = 0.0;
for( i = 0; i < NETWORK_SIZE; i++ )
{
statistic[i] = statistic[i]/(double)(NETWORK_SIZE);
indentify += statistic[i];
}
printf("\nindentify: %f\n", indentify);
}
/*
* 将网络的度分布写入文件 distributionOf@targetfilename
* */
void writeDataToFile(){
FILE* fwrite;
strcat(distribution, "distributionOf");
strcat(distribution, targetfilename);
printf("%s\n", distribution);
if( NULL == (fwrite = fopen(distribution, "w")))
{
printf("open file(%s) error!\n", distribution);
exit(0);
}
int i;
for( i = 0; i < NETWORK_SIZE; i++ )
{
fprintf(fwrite, "%d %f\n",i, statistic[i]);
}
fclose(fwrite);
}
可视化网络(即绘制如上的网络图的代码)的代码(需要安装igraph)
# -*- coding:UTF8 -*-
from igraph import *
edges = []
# 从文件@filename中读入网络的边
def read_edges(filename):
fin = open(filename, "r")
for line in fin:
line = line.strip()
line = line.split(" ")
edges.append((int(line[0]) - 1, int(line[1]) - 1))
def plot_network(size):
g = Graph()
g.add_vertices(size)
g.add_edges(edges)
layout = g.layout('kk')
visual_style = {}
visual_style['layout'] = layout
visual_style['bbox'] = (500,500)
visual_style['vertex_label'] = [(label + 1) for label in range(size)]
visual_style['vertex_color'] = 'white'
visual_style['vertex_size'] = g.degree() # 节点的大小与度成正比
# visual_style['vertex_size'] = 20 # 所有节点的大小都是相同的:20
plot(g, **visual_style)
def main(size):
read_edges("random_edge.data") #包含网络的连边的信息的文件的名称
plot_network(size)
main(10) # 这里的10需要更改为网络中的节点的个数