package Fp_Tree;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
public class Myfptree2 {
final static double STANDARD = (88162 * 0.01);
final static File INFILE = new File("d:\\3.txt");
final static List<Set<String>> DATABASE = new ArrayList<>();
public static int number = 0;
public static Comparator<table> com = new Comparator<table>() {
@Override
public int compare(table o1, table o2) {
return -Integer.compare(o1.frequency, o2.frequency);
}
};
public static void main(String[] args) throws IOException {
go();
}
public static double getStandard() {
return STANDARD;
}
public static void go() throws IOException {
double start = System.currentTimeMillis();
BufferedReader br = new BufferedReader(new FileReader(INFILE));
String str;
Map<String, Integer> c1 = new HashMap<>();
while ((str = br.readLine()) != null) {
String[] tmp = str.split(" ");
Set<String> ss = new HashSet<>();
for (String s : tmp) {
if (!c1.containsKey(s)) c1.put(s, 1);
else {
int x = c1.get(s);
c1.put(s, x + 1);
}
ss.add(s);
}
DATABASE.add(ss);
}
br.close();
//计算c1完毕*******************************
List<table> headerTable = getTable(c1);
headerTable.sort(com);
//创建第一个头表***********************
//第二次扫描数据库得到frequent items
frequentItems[] fi = new frequentItems[DATABASE.size()];
int k = 0;
for (Set<String> s : DATABASE) {
frequentItems f = new frequentItems();
for (table t : headerTable) {
if (s.contains(t.item)) {
f.linkedSet.add(t.item);
}
}
f.num = 1;
fi[k] = f;
k++;
}
node treeTop = createFP(headerTable, fi);
dfsAll(headerTable, treeTop);
System.out.println(number);
double end = System.currentTimeMillis();
System.out.println("运行时间"+((end - start) / 1000)+"s");
}
/**
* @param headerTable FP树对应的头表
* @param topOfTree FP树的顶端节点
* 递归遍历所有树,并计算频繁子集的数量
*/
public static void dfsAll(List<table> headerTable, node topOfTree) {
if (isSignal(headerTable)) {//如果现在的树是单列树,则全排列后计算数量
int size = headerTable.size();
int res = 1;
for (int i = 0; i < size; i++) {
res *= 2;
}
number += res - 1;
return;
}
//如果不是单列树的话,从底部开始对每个table对应的数据进行操作
for (int i = headerTable.size() - 1; i >= 0; i--) {
node lastNode = headerTable.get(i).head;//取出最后一行的头结点
//生成下一个fre***************
frequentItems[] subFrequentItems = new frequentItems[headerTable.get(i).frequency];
Map<String, Integer> subMap = new HashMap<>();
int k = 0;
while (lastNode.next != null) {
lastNode = lastNode.next;//用于横向遍历
node now = lastNode;//用于遍历父节点
int j = 0;
frequentItems f = new frequentItems();
while (now.parent != null) {//存的顺序是反的
now = now.parent;
if (now != null) {
f.linkedSet.add(now.value);
if (!subMap.containsKey(now.value)) {
subMap.put(now.value,lastNode.num);
} else {
int x = subMap.get(now.value);
subMap.put(now.value, x + lastNode.num);
}
}
j++;
}
f.num = lastNode.num;
subFrequentItems[k] = f;
k++;
}
//生成fre成功************
List<table> subHeaderTable = getTable(subMap);
subHeaderTable.sort(com);
subFrequentItems = cutFrequent(subFrequentItems, subHeaderTable);
node subtree = createFP(subHeaderTable, subFrequentItems);
dfsAll(subHeaderTable, subtree);
}//System.out.println(headerTable.size());
number += headerTable.size();
}
//过滤frequent中的非频繁项
public static frequentItems[] cutFrequent(frequentItems[] frequentItems, List<table> header) {
frequentItems[] res = new frequentItems[frequentItems.length];
int k = 0;
for (frequentItems f : frequentItems) {
if (f == null) continue;
frequentItems newf = new frequentItems();
for (table t : header) {
Iterator i = f.linkedSet.iterator();
int flag = 0;
while (i.hasNext()) {
String s = (String) i.next();
if (s.equals(t.item)) flag = 1;
}
if (flag == 1) newf.linkedSet.add(t.item);
}
newf.num = f.num;
res[k] = newf;
k++;
}
return res;
}
//根据map生成头表
public static List<table> getTable(Map<String, Integer> map) {
List<table> header = new ArrayList<>();
Set entrySet = map.entrySet();
Iterator iterator = entrySet.iterator();
while (iterator.hasNext()) {
Map.Entry entry = (Map.Entry) iterator.next();
if (((int) entry.getValue()) >= STANDARD && !((String) entry.getKey()).equals("")) {
table t = new table((String) entry.getKey(), (int) entry.getValue());
header.add(t);
}
}
return header;
}
//判断是不是单列表
public static boolean isSignal(List<table> headerTable) {
int flag = 0;
for (table t : headerTable)
if (t.head.next.next != null) {
flag = 1;
break;
}
if (flag == 1) return false;
else return true;
}
/**
* @param lt 头表
* @param fi 根据头表将非频繁项去除后的下一个数据库
* @return
*/
public static node createFP(List<table> lt, frequentItems[] fi) {
node top = new node();
for (frequentItems f : fi) {//对于每一个频繁集
node now = top;
if (f != null) {
for (String s : f.linkedSet) {//对于频繁集中的每一项,判断他在树中的哪个位置并进行相应的操作
int flag = 0;
//遍历所有子节点
for (node n : now.chile) {
if (n.value.equals(s)) {//如果找到了
n.num = n.num + f.num;
now = n;
flag = 1;
break;
}
}
if (flag == 0) {//如果没找到子节点的值是一样,说明要新开辟
node tmpnode = new node();
tmpnode.value = s;
tmpnode.num = f.num;
tmpnode.parent = now;
now.chile.add(tmpnode);
now = tmpnode;
}
//将表里的头结点连接到新的节点上
for (table t : lt) {
if (t.item.equals(s)) {
node next = t.head;
int flag1 = 0;
while (next.next != null) {
next = next.next;
if (next == now) {
flag1 = 1;
break;
}
}
if (flag1 == 1) break;
else {
next.next = now;
}
}
}
}
}
}
return top;
}
}
class node {
public Set<node> chile = new HashSet<>();
public node parent = null;
public node next = null;
public String value = "";
public int num = 0;
}
class table {
public String item = "";
public int frequency = 0;
public node head = new node();
public table(String item, int frequency) {
this.item = item;
this.frequency = frequency;
}
}
class frequentItems {
public LinkedHashSet<String> linkedSet = new LinkedHashSet<>();
public int num = 0;
}
FP-tree频繁模式挖掘算法
于 2021-11-13 16:15:15 首次发布