gSpan 是一种高效的频繁子图挖掘算法,参考
http://www.cs.ucsb.edu/~xyan/software/gSpan.htm 。
/*
* gSpan algorithm implemented by coolypf
* http://blog.csdn.net/coolypf
*/
#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <time.h>
#include <vector>
#include <map>
#include <set>
using namespace std;
const int LABEL_MAX = 100;
int min_support;
int nr_graph;
struct Graph
{
vector<int> node_label;
vector<int> *edge_next, *edge_label;
vector<int> gs;
void removeEdge(int x, int a, int y)
{
for (size_t i = 0; i < node_label.size(); ++i)
{
int t;
if (node_label[i] == x)
t = y;
else if (node_label[i] == y)
t = x;
else
continue;
for (size_t j = 0; j < edge_next[i].size(); ++j)
{
if (edge_label[i][j] == a && node_label[edge_next[i][j]] == t)
{
/* remove edge */
edge_label[i][j] = edge_label[i].back();
edge_label[i].pop_back();
edge_next[i][j] = edge_next[i].back();
edge_next[i].pop_back();
j--;
}
}
}
}
bool hasEdge(int x, int a, int y)
{
for (size_t i = 0; i < node_label.size(); ++i)
{
int t;
if (node_label[i] == x)
t = y;
else if (node_label[i] == y)
t = x;
else
continue;
for (size_t j = 0; j < edge_next[i].size(); ++j)
if (edge_label[i][j] == a && node_label[edge_next[i][j]] == t)
return true;
}
return false;
}
} *GS;
struct GraphData
{
vector<int> nodel;
vector<bool> nodev;
vector<int> edgex;
vector<int> edgey;
vector<int> edgel;
vector<bool> edgev;
};
class EdgeFrequency
{
int *array;
int u, v;
public:
void init(int max_node_label, int max_edge_label)
{
u = max_node_label + 1;
v = u * (max_edge_label + 1);
array = new int[u * v];
}
int& operator()(int x, int a, int y) { return array[x * v + a * u + y]; }
int operator()(int x, int a, int y) const { return array[x * v + a * u + y]; }
} EF;
struct Edge
{
int ix, iy;
int x, a, y;
Edge(int _ix, int _iy, int _x, int _a, int _y) : ix(_ix), iy(_iy), x(_x), a(_a), y(_y) {}
bool operator<(const Edge &e) const
{
if (ix > iy)
{
if (e.ix < e.iy)
return true;
if (iy < e.iy || (iy == e.iy && a < e.a))
return true;
}
else if (e.ix < e.iy)
{
if (ix > e.ix)
return true;
if (ix == e.ix)
{
if (x < e.x)
return true;
if (x == e.x && (a < e.a || (a == e.a && y < e.y)))
return true;
}
}
return false;
}
};
struct GraphCode
{
vector<const Edge *> seq;
vector<int> gs;
};
vector<Graph *> S; // graph mining result
void subgraph_mining(GraphCode &gc, int next);
int main(int argc, char **argv)
{
clock_t clk = clock();
/* parse command line options */
assert(argc == 5);
int num = atoi(argv[3]), denom = atoi(argv[4]);
assert(num && denom && num <= denom);
/* read graph data */
FILE *fp = fopen(argv[1], "r");
assert(fp);
bool occ_node_label[LABEL_MAX + 1], occ_edge_label[LABEL_MAX + 1];
int freq_node_label[LABEL_MAX + 1], freq_edge_label[LABEL_MAX + 1];
memset(freq_node_label, 0, sizeof(freq_node_label));
memset(freq_edge_label, 0, sizeof(freq_edge_label));
GraphData *gd = NULL;
vector<GraphData *> v_gd;
while (1)
{
static char dummy[10];
if (fscanf(fp, "%s", dummy) <= 0)
{
if (gd)
{
v_gd.push_back(gd);
for (int i = 0; i <= LABEL_MAX; ++i)
{
if (occ_node_label[i])
freq_node_label[i]++;
if (occ_edge_label[i])
freq_edge_label[i]++;
}
}
break;
}
if (*dummy == 't')
{
int id;
fscanf(fp, "%s%d", dummy, &id);
if (gd)
{
v_gd.push_back(gd);
for (int i = 0; i <= L

本文详细介绍了gSpan算法的实现过程,包括图结构、边处理、子图挖掘等功能。通过阅读源代码,读者可以理解gSpan如何查找频繁子图,并进行挖掘。此外,还提供了算法的效率优化和内存管理细节。
最低0.47元/天 解锁文章
3950

被折叠的 条评论
为什么被折叠?



