FPTree


FPTree的资料网上很多,就不写了。

我范的错误:1、忽略了深拷贝,有多条记录时,这多条记录是得重新clone的,看来基础还得加强;

                        2、headerTable中必须是全序的;




package com.tur4;

import java.util.LinkedList;
import java.util.List;

import com.tur4.algorithm.FPTree;
/**
 * 
 * @author cstur4
 * email cstur4@gmail.com
 *
 */
public class test {
	public static void main(String[] args) {
		
		FPTree tree = new FPTree();
		List<LinkedList<String>> records = tree.readTransactionFile("in.txt", ",");
		tree.FPGrowth(records, null, 1);
		tree.showFrequenceSet();
		
		
	}
}


package com.tur4.algorithm;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.log4j.Logger;

import com.tur4.pojo.TreeNode;

/**
 * 
 * @author cstur4
 * email cstur4@gmail.com
 *
 */
public class FPTree {
	
	private static Logger LOG = Logger.getLogger(FPTree.class);
	private List<LinkedList<String>> transactions = new LinkedList<LinkedList<String>>();
	private List<String> frequenceSet = new LinkedList<String>();
	public List<LinkedList<String>> readTransactionFile(String fileName, String separator){
		
		BufferedReader br = null;
		try {
			br = new BufferedReader(new FileReader(fileName));
			String str = null;
			while((str = br.readLine()) != null){
				LinkedList<String> strs = new LinkedList<String>();
				String[] ss = str.split(separator);
				for(String s: ss)
					strs.add(s);
				transactions.add(strs);
				
			}
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}finally{
			
			if(br != null)
				try {
					br.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			LOG.debug("transactions=" + transactions);
		}
		
		return transactions;
	}
	
	public List<TreeNode> buildHeaderTable(List<LinkedList<String>> records, int minSupport ){
		
		//LOG.debug("before header table(src)=" + records);
		Map<String, Long> map = new HashMap<String, Long>();
		for(LinkedList<String> record:records){
			for(String s:record){
				if(map.containsKey(s))
					map.put(s, map.get(s)+1);
				else
					map.put(s, 1L);
			}
		}
		
		List<Entry<String, Long>> table = new LinkedList<Entry<String, Long>>(map.entrySet());
		Collections.sort(table, new Comparator<Entry<String, Long>>() {

			@Override
			public int compare(Entry<String, Long> o1,
					Entry<String, Long> o2) {
				return o1.getValue()>o2.getValue()?-1:1;
			}
			
		});
		
		
		List<TreeNode> res = new LinkedList<TreeNode>();
		
		for(Entry<String, Long> entry: table){
			if(entry.getValue() < minSupport)
				break;
			TreeNode node = new TreeNode();
			node.setName(entry.getKey());
			node.setCount(entry.getValue());
			res.add(node);
		}
		//LOG.debug("after header table=" + res);
		return res;
		
	}
	
	private void sortByFrequence(List<LinkedList<String>> records, List<TreeNode> headerTable){
		
		final Map<String, Integer> map = new HashMap<String, Integer>();
		for(int i=0;i<headerTable.size();++i)
			map.put(headerTable.get(i).getName(), i);
		
		/*(can't just compare the number occurs because of the same numbers, we need total order(全序) instead of 
			partial order(偏序) */
		for(LinkedList<String> record:records){
			Collections.sort(record, new Comparator<String>(){
				@Override
				public int compare(String o1, String o2) {
					if(!map.containsKey(o1))
						return 1;
					if(!map.containsKey(o2))
						return -1;
					return map.get(o1)>map.get(o2)?1:-1;
				}
			});
		}
		LOG.debug("sorted List=" + records);
			
	}
	
	private void insertNodes(TreeNode root, LinkedList<String> records, List<TreeNode> headerTable){
	
		TreeNode subRoot = root;
		
		while(records.size()!=0){
			
			TreeNode node = new TreeNode();
			node.setName(records.pop());
			node.increase();
			TreeNode lastNode = getLastHomonyNode(node.getName(), headerTable);
			if(lastNode == null){
				records.poll();
				continue;
			}
			
			lastNode.setNextHomony(node);
			node.setParent(subRoot);
			subRoot.addChild(node);
			subRoot = node;
		}
	}
	private TreeNode getLastHomonyNode(String name, List<TreeNode> headerTable){
		TreeNode node = null;
		for(TreeNode treeNode: headerTable)
			if(treeNode.getName()!=null && treeNode.getName().equals(name)){
				node = treeNode;
				break;
			}
		if(node == null)
			return null;
		
		while(node.getNextHomony()!=null && node.getNextHomony().getName()!=null)
			node = node.getNextHomony();
		return node;
	}
	
	private void traceTree(TreeNode root, int blank){
		
		for(int i=0;i<blank;++i)
			System.out.print("  ");
		System.out.println(root.getName()+"="+root.getCount());
		if(root.getChildren()!=null)
			for(TreeNode node:root.getChildren()){
				traceTree(node, blank+1);
			}
		
	}
	public TreeNode buildFPTree(List<LinkedList<String>> records, List<TreeNode> headerTable){
		
		sortByFrequence(records, headerTable);
		
		TreeNode root = new TreeNode();
		TreeNode subRoot = null;
		TreeNode tmpNode;
		
		for(LinkedList<String> record:records){
			//LOG.debug("records for build tree=" + records);
			subRoot = root;	
			while(record.size()>0 && getLastHomonyNode(record.peek(), headerTable)!=null //not frequency item
					&& (tmpNode = subRoot.findChild(record.peek())) != null){
				
				tmpNode.increase();
				subRoot = tmpNode;
				record.poll();
			}
			insertNodes(subRoot, record, headerTable);
			
		}
		traceTree(root, 0);
		
		
		return root;
	}
	private void combination(List<TreeNode> nodes, int i, String itemset, List<String> posfix){
		if(i == nodes.size()){
			StringBuilder sb = new StringBuilder();
			sb.append(itemset);
			for(String s:posfix)
	    	   sb.append(s).append("/");
	        if(sb.toString().length()>2){
				frequenceSet.add(sb.toString());
				LOG.debug(sb.toString() + " added");
			}
			return;
		}
		TreeNode node = nodes.get(i);
		combination(nodes, i+1, itemset+node.getName()+"/", posfix);
		combination(nodes, i+1, itemset, posfix);
      
	}
	
	
	public void FPGrowth(List<LinkedList<String>> records, List<String> pattern, int minSupport){
		
		List<TreeNode> headerTable = buildHeaderTable(records, minSupport);
		LOG.debug("pattern="+pattern+"\theaderTable:"+headerTable);
		TreeNode root = buildFPTree(records, headerTable);
		
		if(records.size() == 1){//单路径
			
			combination(headerTable, 0, "", pattern);
			return;
			
		}
		if(root.getChildren()==null || root.getChildren().size()==0)
			return;
		
		for(int i=headerTable.size()-1;i>=0;i--){
			TreeNode header = headerTable.get(i);
			
			TreeNode headerNode = header;
			List<LinkedList<String>> CPB = new LinkedList<LinkedList<String>>();
			while((headerNode = headerNode.getNextHomony()) != null){
				TreeNode backNode = headerNode;
				LinkedList<String> preNodes = new LinkedList<String>();
				while((backNode = backNode.getParent()).getName() != null){
					preNodes.add(backNode.getName());
				}
				long count = headerNode.getCount();
				if(preNodes.size()!=0)
					while(count-- > 0 )
						CPB.add((LinkedList<String>) preNodes.clone());		
			}
			LinkedList<String> postPattern = new LinkedList<String>();
			postPattern.add(header.getName());
			if(pattern != null)
				postPattern.addAll(pattern);
		
			FPGrowth(CPB, postPattern, minSupport);
		}
	}

	public void showFrequenceSet() {
		for(String s:frequenceSet)
			System.out.println(s);
	}

}



package com.tur4.pojo;

import java.util.LinkedList;
import java.util.List;

import javax.management.RuntimeErrorException;

/**
 * 
 * @author cstur4
 * email cstur4@gmail.com
 *
 */
public class TreeNode implements Comparable<TreeNode>{

	private String name;
	private Long count = 0L;
	private TreeNode nextHomony;
	private List<TreeNode> children;
	private TreeNode parent;

	public TreeNode findChild( String name){
		
		if(children !=  null)
			for(int i=0;i<children.size();++i)
				if(children.get(i).getName()!=null && children.get(i).getName().equals(name))
					return children.get(i);
		return null;
	}

	public String getName() {
		return name;
	}

	public void setName(String name) {
		this.name = name;
	}

	public Long getCount() {
		return count;
	}

	public void setCount(Long count) {
		this.count = count;
	}

	public TreeNode getNextHomony() {
		return nextHomony;
	}

	public void setNextHomony(TreeNode nextHomony) {
		this.nextHomony = nextHomony;
	}

	public List<TreeNode> getChildren() {
		return children;
	}

	public void setChildren(List<TreeNode> children) {
		this.children = children;
	}

	public TreeNode getParent() {
		return parent;
	}

	public void setParent(TreeNode parent) {
		this.parent = parent;
	}
	
	public void increase(Long increaseNum){
		this.count += increaseNum;
	}
	
	public void increase(){
		this.count ++;
	}
	
	public void addChild(TreeNode node){
		if(children==null)
			children = new LinkedList<TreeNode>();
		children.add(node);
		
	}
	@Override
	public String toString() {
		return name + "=" + count;
	}
	
	@Override
	public boolean equals(Object obj) {
		TreeNode node = (TreeNode)obj;
		return node.name.equals(name);
	}

	@Override
	public int compareTo(TreeNode o) {
		return  this.count-o.count>0?-1:1;
	}
	
	

}





  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值