数据挖掘算法FP-Tree的实现---java

数据挖掘算法FP-tree算法的实现

1.算法简介

FP-Tree算法全称是FrequentPattern Tree算法,就是频繁模式树算法,他与Apriori算法一样也是用来挖掘频繁项集的,不过不同的是,FP-Tree算法是Apriori算法的优化处理,他解决了Apriori算法在过程中会产生大量的候选集的问题,而FP-Tree算法则是发现频繁模式而不产生候选集。但是频繁模式挖掘出来后,产生关联规则的步骤还是和Apriori是一样的。

2.算法的实现

1.实验数据集

牛奶,鸡蛋,面包,薯片
鸡蛋,爆米花,薯片,啤酒
鸡蛋,面包,薯片
牛奶,鸡蛋,面包,爆米花,薯片,啤酒
牛奶,面包,啤酒
鸡蛋,面包,啤酒
牛奶,面包,薯片
牛奶,鸡蛋,面包,黄油,薯片
牛奶,鸡蛋,黄油,薯片

2.实验代码

FPTree.java

package package1;

import java.io.BufferedReader;

import java.io.FileReader;

import java.io.IOException;

import java.util.ArrayList;

import java.util.Collections;

import java.util.Comparator;

import java.util.HashMap;

import java.util.LinkedList;

import java.util.List;

import java.util.Map;

import java.util.Map.Entry;

import java.util.Set;

public class FPTree {

	private int minSuport;

	public int getMinSuport() {

		return minSuport;

	}

	public void setMinSuport(int minSuport) {

		this.minSuport = minSuport;

	}

	// 从若干个文件中读入Transaction Record

	public List<List<String>> readTransRocords(String... filenames) {

		List<List<String>> transaction = null;

		if (filenames.length > 0) {

			transaction = new LinkedList<List<String>>();

			for (String filename : filenames) {

				try {

					FileReader fr = new FileReader(filename);

					BufferedReader br = new BufferedReader(fr);

					try {

						String line;

						List<String> record;

						while ((line = br.readLine()) != null) {

							if (line.trim().length() > 0) {

								String str[] = line.split(",");

								record = new LinkedList<String>();

								for (String w : str)

									record.add(w);

								transaction.add(record);

							}

						}

					} finally {

						br.close();

					}

				} catch (IOException ex) {

					System.out.println("Read transaction records failed."

							+ ex.getMessage());

					System.exit(1);

				}

			}

		}

		return transaction;

	}

	// FP-Growth算法

	public void FPGrowth(List<List<String>> transRecords,

			List<String> postPattern) {

		// 构建项头表,同时也是频繁1项集

		ArrayList<TreeNode> HeaderTable = buildHeaderTable(transRecords);

		// 构建FP-Tree

		TreeNode treeRoot = buildFPTree(transRecords, HeaderTable);

		// 如果FP-Tree为空则返回

		if (treeRoot.getChildren() == null

				|| treeRoot.getChildren().size() == 0)

			return;

		// 输出项头表的每一项+postPattern

		if (postPattern != null) {

			for (TreeNode header : HeaderTable) {

				System.out.print(header.getCount() + "\t" + header.getName());

				for (String ele : postPattern)

					System.out.print("\t" + ele);

				System.out.println();

			}

		}

		// 找到项头表的每一项的条件模式基,进入递归迭代

		for (TreeNode header : HeaderTable) {

			// 后缀模式增加一项

			List<String> newPostPattern = new LinkedList<String>();

			newPostPattern.add(header.getName());

			if (postPattern != null)

				newPostPattern.addAll(postPattern);

			// 寻找header的条件模式基CPB,放入newTransRecords中

			List<List<String>> newTransRecords = new LinkedList<List<String>>();

			TreeNode backnode = header.getNextHomonym();

			while (backnode != null) {

				int counter = backnode.getCount();

				List<String> prenodes = new ArrayList<String>();

				TreeNode parent = backnode;

				// 遍历backnode的祖先节点,放到prenodes中

				while ((parent = parent.getParent()).getName() != null) {

					prenodes.add(parent.getName());

				}

				while (counter-- > 0) {

					newTransRecords.add(prenodes);

				}

				backnode = backnode.getNextHomonym();

			}

			// 递归迭代

			FPGrowth(newTransRecords, newPostPattern);

		}

	}

	// 构建项头表,同时也是频繁1项集

	public ArrayList<TreeNode> buildHeaderTable(List<List<String>> transRecords) {

		ArrayList<TreeNode> F1 = null;

		if (transRecords.size() > 0) {

			F1 = new ArrayList<TreeNode>();

			Map<String, TreeNode> map = new HashMap<String, TreeNode>();

			// 计算事务数据库中各项的支持度

			for (List<String> record : transRecords) {

				for (String item : record) {

					if (!map.keySet().contains(item)) {

						TreeNode node = new TreeNode(item);

						node.setCount(1);

						map.put(item, node);

					} else {

						map.get(item).countIncrement(1);

					}

				}

			}

			// 把支持度大于(或等于)minSup的项加入到F1中

			Set<String> names = map.keySet();

			for (String name : names) {

				TreeNode tnode = map.get(name);

				if (tnode.getCount() >= minSuport) {

					F1.add(tnode);

				}

			}

			Collections.sort(F1);

			return F1;

		} else {

			return null;

		}

	}

	// 构建FP-Tree

	public TreeNode buildFPTree(List<List<String>> transRecords,

			ArrayList<TreeNode> F1) {

		TreeNode root = new TreeNode(); // 创建树的根节点

		for (List<String> transRecord : transRecords) {

			LinkedList<String> record = sortByF1(transRecord, F1);

			TreeNode subTreeRoot = root;

			TreeNode tmpRoot = null;

			if (root.getChildren() != null) {

				while (!record.isEmpty()

						&& (tmpRoot = subTreeRoot.findChild(record.peek())) != null) {

					tmpRoot.countIncrement(1);

					subTreeRoot = tmpRoot;

					record.poll();

				}

			}

			addNodes(subTreeRoot, record, F1);

		}

		return root;

	}

	// 把交易记录按项的频繁程序降序排列

	public LinkedList<String> sortByF1(List<String> transRecord,

			ArrayList<TreeNode> F1) {

		Map<String, Integer> map = new HashMap<String, Integer>();

		for (String item : transRecord) {

			// 由于F1已经是按降序排列的,

			for (int i = 0; i < F1.size(); i++) {

				TreeNode tnode = F1.get(i);

				if (tnode.getName().equals(item)) {

					map.put(item, i);

				}

			}

		}

		ArrayList<Entry<String, Integer>> al = new ArrayList<Entry<String, Integer>>(

				map.entrySet());

		Collections.sort(al, new Comparator<Map.Entry<String, Integer>>() {

			@Override

			public int compare(Entry<String, Integer> arg0,

					Entry<String, Integer> arg1) {

				// 降序排列

				return arg0.getValue() - arg1.getValue();

			}

		});

		LinkedList<String> rest = new LinkedList<String>();

		for (Entry<String, Integer> entry : al) {

			rest.add(entry.getKey());

		}

		return rest;

	}

	// 把record作为ancestor的后代插入树中

	public void addNodes(TreeNode ancestor, LinkedList<String> record,

			ArrayList<TreeNode> F1) {

		if (record.size() > 0) {

			while (record.size() > 0) {

				String item = record.poll();

				TreeNode leafnode = new TreeNode(item);

				leafnode.setCount(1);

				leafnode.setParent(ancestor);

				ancestor.addChild(leafnode);

				for (TreeNode f1 : F1) {

					if (f1.getName().equals(item)) {

						while (f1.getNextHomonym() != null) {

							f1 = f1.getNextHomonym();

						}

						f1.setNextHomonym(leafnode);

						break;

					}

				}

				addNodes(leafnode, record, F1);

			}

		}

	}

	public static void main(String[] args) {

		FPTree fptree = new FPTree();

		fptree.setMinSuport(4);

		List<List<String>> transRecords = fptree

				.readTransRocords(System.getProperty("user.dir") + "\\resource\\market");

		fptree.FPGrowth(transRecords, null);

	}

}

TreeNode.java
package package1;

import java.util.ArrayList;

import java.util.List;

public class TreeNode implements Comparable<TreeNode> {


	private String name; // 节点名称

	private int count; // 计数

	private TreeNode parent; // 父节点

	private List<TreeNode> children; // 子节点

	private TreeNode nextHomonym; // 下一个同名节点

	public TreeNode() {

	}


	public TreeNode(String name) {

		this.name = name;

	}

 
	public String getName() {

		return name;

	}


	public void setName(String name) {

		this.name = name;

	}

 

	public int getCount() {

		return count;

	}


	public void setCount(int count) {

		this.count = count;

	}

 

	public TreeNode getParent() {

		return parent;

	}


	public void setParent(TreeNode parent) {

		this.parent = parent;

	}


	public List<TreeNode> getChildren() {

		return children;

	}

	public void addChild(TreeNode child) {

		if (this.getChildren() == null) {

			List<TreeNode> list = new ArrayList<TreeNode>();

			list.add(child);

			this.setChildren(list);

		} else {

			this.getChildren().add(child);

		}

	}

	public TreeNode findChild(String name) {

		List<TreeNode> children = this.getChildren();

		if (children != null) {

			for (TreeNode child : children) {

				if (child.getName().equals(name)) {

					return child;

				}

			}

		}

		return null;

	}

 

	public void setChildren(List<TreeNode> children) {

		this.children = children;

	}

	public void printChildrenName() {

		List<TreeNode> children = this.getChildren();

		if (children != null) {

			for (TreeNode child : children) {

				System.out.print(child.getName() + " ");

			}

		} else {

			System.out.print("null");

		}

	}

 

	public TreeNode getNextHomonym() {

		return nextHomonym;

	}

	public void setNextHomonym(TreeNode nextHomonym) {

		this.nextHomonym = nextHomonym;

	}

 

	public void countIncrement(int n) {

		this.count += n;

	}

	@Override

	public int compareTo(TreeNode arg0) {

		// TODO Auto-generated method stub

		int count0 = arg0.getCount();

		// 跟默认的比较大小相反,导致调用Arrays.sort()时是按降序排列

		return count0 - this.count;

	}

}

3.运行演示截图

在这里插入图片描述

  • 3
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

良缘白马

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值