gSpan 是一种高效的频繁子图挖掘算法,参考
http://www.cs.ucsb.edu/~xyan/software/gSpan.htm 。
/* * gSpan algorithm implemented by coolypf * http://blog.csdn.net/coolypf */ #define _CRT_SECURE_NO_WARNINGS 1 #include <stdio.h> #include <stdlib.h> #include <string.h> #include <assert.h> #include <time.h> #include <vector> #include <map> #include <set> using namespace std; const int LABEL_MAX = 100; int min_support; int nr_graph; struct Graph { vector<int> node_label; vector<int> *edge_next, *edge_label; vector<int> gs; void removeEdge(int x, int a, int y) { for (size_t i = 0; i < node_label.size(); ++i) { int t; if (node_label[i] == x) t = y; else if (node_label[i] == y) t = x; else continue; for (size_t j = 0; j < edge_next[i].size(); ++j) { if (edge_label[i][j] == a && node_label[edge_next[i][j]] == t) { /* remove edge */ edge_label[i][j] = edge_label[i].back(); edge_label[i].pop_back(); edge_next[i][j] = edge_next[i].back(); edge_next[i].pop_back(); j--; } } } } bool hasEdge(int x, int a, int y) { for (size_t i = 0; i < node_label.size(); ++i) { int t; if (node_label[i] == x) t = y; else if (node_label[i] == y) t = x; else continue; for (size_t j = 0; j < edge_next[i].size(); ++j) if (edge_label[i][j] == a && node_label[edge_next[i][j]] == t) return true; } return false; } } *GS; struct GraphData { vector<int> nodel; vector<bool> nodev; vector<int> edgex; vector<int> edgey; vector<int> edgel; vector<bool> edgev; }; class EdgeFrequency { int *array; int u, v; public: void init(int max_node_label, int max_edge_label) { u = max_node_label + 1; v = u * (max_edge_label + 1); array = new int[u * v]; } int& operator()(int x, int a, int y) { return array[x * v + a * u + y]; } int operator()(int x, int a, int y) const { return array[x * v + a * u + y]; } } EF; struct Edge { int ix, iy; int x, a, y; Edge(int _ix, int _iy, int _x, int _a, int _y) : ix(_ix), iy(_iy), x(_x), a(_a), y(_y) {} bool operator<(const Edge &e) const { if (ix > iy) { if (e.ix < e.iy) return true; if (iy < e.iy || (iy == e.iy && a < e.a)) return true; } else if (e.ix < e.iy) { if (ix > e.ix) return true; if (ix == e.ix) { if (x < e.x) return true; if (x == e.x && (a < e.a || (a == e.a && y < e.y))) return true; } } return false; } }; struct GraphCode { vector<const Edge *> seq; vector<int> gs; }; vector<Graph *> S; // graph mining result void subgraph_mining(GraphCode &gc, int next); int main(int argc, char **argv) { clock_t clk = clock(); /* parse command line options */ assert(argc == 5); int num = atoi(argv[3]), denom = atoi(argv[4]); assert(num && denom && num <= denom); /* read graph data */ FILE *fp = fopen(argv[1], "r"); assert(fp); bool occ_node_label[LABEL_MAX + 1], occ_edge_label[LABEL_MAX + 1]; int freq_node_label[LABEL_MAX + 1], freq_edge_label[LABEL_MAX + 1]; memset(freq_node_label, 0, sizeof(freq_node_label)); memset(freq_edge_label, 0, sizeof(freq_edge_label)); GraphData *gd = NULL; vector<GraphData *> v_gd; while (1) { static char dummy[10]; if (fscanf(fp, "%s", dummy) <= 0) { if (gd) { v_gd.push_back(gd); for (int i = 0; i <= LABEL_MAX; ++i) { if (occ_node_label[i]) freq_node_label[i]&#