public class TreeNode {
private TreeNode parent;
private String name;
private int count;
private Set<TreeNode> children;
public TreeNode(TreeNode parent,String name,int count){
this.count = count;
this.parent = parent;
this.name = name;
}
public TreeNode(String name,int count){
this.name = name;
this.count = count;
}
/**
* 当前节点计数+i
* @param i
*/
public void incrementCount(int i){
this.count = count + i;
}
/**
* 父节点是否包含子节点包含则返回,否则返回null
* @param key
* @return
*/
public TreeNode findChild(String key){
if(this.children == null){
return null;
}
for(TreeNode child:this.children){
if(StringUtils.equals(child.name,key)){
return child;
}
}
return null;
}
/**
* 给父节点增加一个子节点
* @param child
* @return
*/
public TreeNode addChild(TreeNode child){
if(this.children == null){
this.children = new HashSet<TreeNode>();
}
this.children.add(child);
return child;
}
public boolean isEmpty(){
return this.children==null || this.children.size()==0;
}
public TreeNode getParent() {
return parent;
}
public void setParent(TreeNode parent) {
this.parent = parent;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getCount() {
return count;
}
public void setCount(int count) {
this.count = count;
}
public Set<TreeNode> getChildren() {
return children;
}
public void setChildren(Set<TreeNode> children) {
this.children = children;
}
}
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
public class FpTree {
private static int support = 3;
public static void main(String[] args) throws IOException{
String file = "D:\\R\\aprior.txt";
Iterator<String> lineIte = FileUtils.lineIterator(new File(file));
List<List<String>> transactions = new ArrayList<List<String>>();
while(lineIte.hasNext()){
String line = lineIte.next();
if(StringUtils.isNotEmpty(line)){
String[] subjects = line.split(",");
List<String> list = new ArrayList<String>(Arrays.asList(subjects));
transactions.add(list);
}
}
List<String> frequences = new LinkedList<String>();
digTree(transactions,frequences);
}
public static void digTree(List<List<String>> transactions,
List<String> frequences){
final Map<String,Integer> sortedMap = scanAndSort(transactions);
if(sortedMap.size() == 0){
return;
}
Map<String,List<TreeNode>> index = new HashMap<String,List<TreeNode>>();
TreeNode root = buildTree(transactions,index,sortedMap);
List<String> headTable = new ArrayList<String>(sortedMap.keySet());
Collections.sort(headTable,new Comparator<String>(){
@Override
public int compare(String o1, String o2) {
int i = sortedMap.get(o2)-sortedMap.get(o1);
return i != 0 ? i : o1.compareTo(o2);
}});
for(int i=headTable.size()-1;i>=0;i--){
String subject = headTable.get(i);
List<List<String>> frequentModeBases = extract(index.get(subject),root);
LinkedList<String> nextFrequences = new LinkedList<String>(frequences);
nextFrequences.add(subject);
if(nextFrequences.size()>1){
System.out.println(StringUtils.join(nextFrequences,",")+"\t"+sortedMap.get(subject));
}
digTree(frequentModeBases,nextFrequences);
}
}
/**
* 挖掘一个项上面的频繁模式基
* @param list
* @param root
* @return
*/
public static List<List<String>> extract(List<TreeNode> list,TreeNode root){
List<List<String>> returnList = new ArrayList<List<String>>();
for(TreeNode node:list){
TreeNode parent = node.getParent();
if(parent.getCount() != -1){
ArrayList<String> tranc = new ArrayList<String>();
while(parent.getCount() != -1){
tranc.add(parent.getName());
parent = parent.getParent();
}
for(int i=0;i<node.getCount();i++){
returnList.add(tranc);
}
}
}
return returnList;
}
/**
* 构建pf树
* @param file
* @param index
* @param sortedMap
* @return
* @throws IOException
*/
public static TreeNode buildTree(List<List<String>> transactions,
Map<String,List<TreeNode>> index,
final Map<String,Integer> sortedMap){
TreeNode root = new TreeNode(null,"root",-1);
for(List<String> subjects:transactions){
Iterator<String> ite = subjects.iterator();
while(ite.hasNext()){
String subject = ite.next();
if(!sortedMap.containsKey(subject)){
ite.remove();
}
}
Collections.sort(subjects,new Comparator<String>(){
@Override
public int compare(String o1, String o2) {
int i = sortedMap.get(o2)-sortedMap.get(o1);
return i != 0 ? i : o1.compareTo(o2);
}});
TreeNode current = root;
for(int i=0;i<subjects.size();i++){
String subject = subjects.get(i);
TreeNode next = current.findChild(subject);
if(next == null){
TreeNode newNode = new TreeNode(current,subject,1);
current.addChild(newNode);
current = newNode;
List<TreeNode> thisIndex = index.get(subject);
if(thisIndex == null){
thisIndex = new ArrayList<TreeNode>();
index.put(subject, thisIndex);
}
thisIndex.add(newNode);
}else{
next.incrementCount(1);
current = next;
}
}
}
return root;
}
/**
* 扫描排序
* @param file
* @return
* @throws IOException
*/
public static Map<String,Integer> scanAndSort(List<List<String>> transactions){
Map<String,Integer> map = new HashMap<String,Integer>();
if(transactions.size()==0){
return map;
}
for(List<String> basket:transactions){
for(String subject:basket){
Integer count = map.get(subject);
if (count == null) {
map.put(subject, 1);
} else {
map.put(subject, count + 1);
}
}
}
Iterator<Entry<String,Integer>> ite = map.entrySet().iterator();
while(ite.hasNext()){
Entry<String,Integer> entry = ite.next();
if(entry.getValue() < support){
ite.remove();
}
}
return map;
}
}