实验结果
最小支持度0.001条件下可以得到准确结果,仅用1.6s
最小可以支持最小支持度为0.0003的计算
相关阅读
完整代码
package com.company;
import java.io.*;
import java.util.*;
class FPNode {
String name;
int count = 0;
FPNode brother = null, parent = null;
ArrayList<FPNode> children = new ArrayList<>();
FPNode(String name) {
this.name = name;
}
FPNode(String name, FPNode parent, int count) {
this.name = name;
this.parent = parent;
this.count = count;
}
}
class Item {
String name;
int support;
Item(String name, int support) {
this.name = name;
this.support = support;
}
}
class Trade {
ArrayList<Item> items = new ArrayList<>();
public Trade(List<String> items, Map<String, Integer> frequentItems) {
items.forEach(item -> {
if (frequentItems.containsKey(item))
this.items.add(new Item(item, frequentItems.get(item)));
});
this.items.sort((l, r) -> Integer.compare(r.support, l.support));//按支持度从大到小排序
}
}
class FPTree {
int minSupport;
FPNode root = new FPNode("root");
Map<String, Integer> frequentItems = new HashMap<>();
Map<String, FPNode> headTable = new HashMap<>();
//该项对应的最后一个节点
Map<String, FPNode> currentPosition = new HashMap<>();
List<List<String>> database;
public FPTree(List<List<String>> database, int minSupport) {
this.minSupport = minSupport;
this.database = database;
getFrequentItems();
buildHeadTable();
buildTree();
}
//计算支持度
public void getFrequentItems() {
Map<String, Integer> supportCount = new HashMap<>();
for (List<String> line :
database)
for (String item :
line)
supportCount.merge(item, 1, Integer::sum);
supportCount.forEach((item, support) -> {
if (support >= minSupport)
frequentItems.put(item, support);
});
}
//建立头表
public void buildHeadTable() {
frequentItems.keySet().forEach(frequentItem -> {
headTable.put(frequentItem, new FPNode(frequentItem));
currentPosition.put(frequentItem, headTable.get(frequentItem));
});
}
//建立FP-tree
public void buildTree() {
database.forEach(items -> {
Trade trade = new Trade(items, frequentItems);
insertTree(trade, root);
});
}
public int insertTree(Trade trade, FPNode fpNode) {
if (trade.items.size() == 0)
return 0;
Item item = trade.items.remove(0);
FPNode nextFPNode;
// 查找该项是否存在
for (FPNode children :
fpNode.children)
if (children.name.equals(item.name)) {
children.count++;
nextFPNode = children;
return insertTree(trade, nextFPNode);
}
nextFPNode = new FPNode(item.name, fpNode, 1);
fpNode.children.add(nextFPNode);
currentPosition.get(item.name).brother = nextFPNode;
currentPosition.put(item.name, nextFPNode);
return insertTree(trade, nextFPNode);
}
}
class FrequentItem {
List<String> items;
int support;
public FrequentItem(List<String> items, int support) {
this.items = items;
this.support = support;
}
}
class FPGrowth {
int minSupport;
List<FrequentItem> patternList = new ArrayList<>();
public FPGrowth(FPTree fpTree, int minSupport) {
this.minSupport = minSupport;
fpGrowth(fpTree, null);
}
private void fpGrowth(FPTree fpTree, List<String> suffix) {
if (fpTree.root.children.size() == 0)
return;
fpTree.frequentItems.keySet().forEach(frequentItem -> {
List<String> newSuffix = new ArrayList<>();
newSuffix.add(frequentItem);
if (suffix != null && !suffix.isEmpty())
newSuffix.addAll(suffix);
patternList.add(new FrequentItem(newSuffix, fpTree.frequentItems.get(frequentItem)));
//生成条件模式库
List<List<String>> conditionalPatternDatabase = generateConditionalPatternDatabase(fpTree, frequentItem);
//生成条件FPTree
FPTree conditionalFPTree = new FPTree(conditionalPatternDatabase, minSupport);
fpGrowth(conditionalFPTree, newSuffix);
});
}
private List<List<String>> generateConditionalPatternDatabase(FPTree fpTree, String frequentItem) {
List<List<String>> conditionalPatternDatabase = new ArrayList<>();
FPNode headNode = fpTree.headTable.get(frequentItem);
for (FPNode fpNode = headNode.brother; fpNode != null; fpNode = fpNode.brother) {
//生成前缀路径
List<String> prefixPath = new ArrayList<>();
for (FPNode fpNode1 = fpNode.parent; fpNode1.parent != null; fpNode1 = fpNode1.parent)
prefixPath.add(fpNode1.name);
for (int i = 0; i < fpNode.count; i++)
conditionalPatternDatabase.add(prefixPath);
}
return conditionalPatternDatabase;
}
public List<FrequentItem> getPatternList() {
patternList.sort((l, r) -> Integer.compare(r.support, l.support));
return patternList;
}
}
public class Main {
private static final List<List<String>> database = new ArrayList<>();
public static void main(String[] args) throws IOException {
double minSupport;
int count = 0;
Scanner scanner = new Scanner(System.in);
minSupport = scanner.nextDouble();
long startTime = System.currentTimeMillis();
loadData();
FPTree fpTree = new FPTree(database, (int) Math.ceil(minSupport * database.size()));
FPGrowth fpGrowth = new FPGrowth(fpTree, (int) Math.ceil(minSupport * database.size()));
for (FrequentItem frequentItem :
fpGrowth.getPatternList()) {
System.out.println(frequentItem.items + ": " + frequentItem.support);
count++;
}
System.out.println("总数: " + count);
long endTime = System.currentTimeMillis();
System.out.println("程序运行时间:" + (endTime - startTime) + "ms");
}
private static void loadData() throws IOException {
try (BufferedReader bufferedReader = new BufferedReader(new FileReader("retail.dat"))) {
String line;
while ((line = bufferedReader.readLine()) != null) {
String[] temp = line.split(" ");
database.add(Arrays.asList(temp));
}
}
}
}