java实现fp-growth算法

最近公司项目上用到频繁项发现算法,于是就用java实现了一个fp-growth算法实现。

环境说明 版本说明 备注
操作系统 debian 9 无 
jdk openjdk 1.8 无 

关于fp-growth算法的原理请参考:

https://www.cnblogs.com/pinard/p/6307064.html 和《机器学习实战》。

FpTreeNode类

package com.slyk.sdp.algorithms.externalAlgorithms.fpTree;

import java.util.ArrayList;
import java.util.List;

/**
 * 描述:fpTree树节点
 * 
 * @param <T>
 *
 * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
 * @created on 2019年5月23日,下午8:01:46
 */
public class FpTreeNode<T> {
    /**
     * 当前节点频繁度
     */
    private long count = 0;
    
    /**
     * 节点内容值
     */
    private T nodeVal;
    
    /**
     * 父类节点
     */
    private FpTreeNode<T> parent = null;
    
    /**
     * 当前节点子节点
     */
    private List<FpTreeNode<T>> children = null;
    
    /**
     * helper
     */
    private FpTreeHelper<T> helper = null;
    
    public FpTreeNode(long count, T nodeVal, FpTreeNode<T> parent, List<FpTreeNode<T>> children,
            FpTreeHelper<T> helper) {
        super();
        this.count = count;
        this.nodeVal = nodeVal;
        this.parent = parent;
        this.children = children;
        this.helper = helper;
    }

    /**
     * 描述:添加子节点
     *
     * @param child
     * @return 被添加的子节点
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * @created on 2019年5月23日,下午7:33:13
     */
    public FpTreeNode<T> addChild(FpTreeNode<T> child) {
        if (this.getChildren() == null) {
            children = new ArrayList<FpTreeNode<T>>();
        }
        child.setParent(this);
        this.children.add(child);
        return child;
    }
    
    /**
     * 描述:向当前节点添加路径
     * <br/>
     * List结构数据前一项为后一项数据父节点,例:<br/>
     * a,b,c,d</br>
     * <table border="1px" cellspacing="0px">
     * <tr><th>节点</th><th>父节点</th></tr>
     * <tr><td>a</td><td>null</td></tr>
     * <tr><td>b</td><td>a</td></tr>
     * <tr><td>c</td><td>b</td></tr>
     * <tr><td>d</td><td>c</td></tr>
     * </table>
     * 
     * @param path 树的一条路径,是某个事物下的数据记录列表
     * @param parentNode 路径第一个节点的父节点
     * @return 
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * @created on 2019年5月25日,下午9:42:41
     */
    public void addPath(List<T> path, FpTreeNode<T> parentNode) {
        if (path == null || path.size() == 0) {
            return ;
        }
        
        T firstEl = path.get(0);
        if (parentNode != null 
                && helper.nodeCompare(firstEl, parentNode.getNodeVal())) {
            parentNode.increaseCountOne();
            parentNode.addPath(path.subList(1, path.size()), parentNode);
        } else {
            FpTreeNode<T> fnode = new FpTreeNode<T>(1, firstEl, null, null, this.getHelper()); 
            
            FpTreeNode<T> exsistChild = this.findChild(fnode.getNodeVal());
            if (exsistChild != null) {
                exsistChild.increaseCountOne();
                exsistChild.addPath(path.subList(1, path.size()), exsistChild);
            } else {
                FpTreeNode<T> node = this.addChild(fnode);
                node.addPath(path.subList(1, path.size()), node);
            }
        }
    }
    
    /**
     * 描述:计数器加一
     *
     * @return 当前节点计数器
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * @created on 2019年5月23日,下午7:36:21
     */
    public long increaseCountOne() {
        return this.increaseCount(1);
    }
    
    /**
     * 描述:
     *
     * @param increasement
     * @return 当前节点计数器
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * @created on 2019年5月23日,下午7:37:16
     */
    public long increaseCount(long increasement) {
        this.count += increasement;
        return this.count;
    }
    
    /**
     * 描述: 当前节点寻找指定子节点,有,则返回节点,无则返回null
     *
     * @param childVal
     * @return
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * @created on 2019年5月23日,下午7:41:42
     */
    public FpTreeNode<T> findChild(T childVal) {
        if (children == null) {
            return null;
        }
        for (FpTreeNode<T> child : children) {
            if (helper.nodeCompare(child.getNodeVal(), childVal)) {
                return child;
            }
        }
        return null;
    }

    @Override
    public String toString() {
        return super.toString() + "-node (val:" + this.getNodeVal() + ", count: " + this.getCount() + ")";
    }

    public long getCount() {
        return count;
    }

    public void setCount(long count) {
        this.count = count;
    }

    public T getNodeVal() {
        return nodeVal;
    }

    public void setNodeVal(T nodeVal) {
        this.nodeVal = nodeVal;
    }

    public FpTreeNode<T> getParent() {
        return parent;
    }

    public void setParent(FpTreeNode<T> parent) {
        this.parent = parent;
    }

    public List<FpTreeNode<T>> getChildren() {
        return children;
    }

    public void setChildren(List<FpTreeNode<T>> children) {
        this.children = children;
    }

    public FpTreeHelper<T> getHelper() {
        return helper;
    }

    public void setHelper(FpTreeHelper<T> helper) {
        this.helper = helper;
    }
}

FpTreeHeader类

package com.slyk.sdp.algorithms.externalAlgorithms.fpTree;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;

import com.slyk.sdp.algorithms.externalAlgorithms.fpTree.util.ListSortUtils;

/**
 * 描述:fptree项头表
 * 
 * @param <K>
 *
 * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
 * @created on 2019年5月23日,下午8:05:14
 */
@SuppressWarnings("hiding")
public class FpTreeHeader<K, Integer> extends LinkedHashMap <K, java.lang.Integer> {
    private static Logger logger = LoggerFactory.getLogger(FpTreeHeader.class);
    
    private static final long serialVersionUID = 1L;
    
    /**
     * 过滤、排序后的原始数据,用以做构建fptree输入数据
     */
    private List<List<K>> inputData = new LinkedList<List<K>>();
    
    /**
     * helper
     */
    private FpTreeHelper<K> helper;
    
    /**
     * 节点链,fptree构建后依据项头表建立的节点链列表
     */
    private Map<K, List<FpTreeNode<K>>> treeNodeMap = new LinkedHashMap<K, List<FpTreeNode<K>>>();
    
    /**
     * 描述:添加helper
     *
     * @param helper
     * @return
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * @created on 2019年5月29日,上午10:54:18
     */
    public FpTreeHeader<K, Integer> addHelper( FpTreeHelper<K> helper) {
        this.setHelper(helper);
        return this;
    }
    
    /**
     * 描述: 构建节点链列表
     *
     * @param node
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * Created On 2019年5月29日, 上午1:13:27
     */
    protected void buildNodeEntryList(FpTreeNode<K> node) {
        if (node.getCount() != -1) {
            List<FpTreeNode<K>> nodeList = treeNodeMap.get(node.getNodeVal());
            if (nodeList == null) {
                nodeList = new ArrayList<FpTreeNode<K>>();
                nodeList.add(node);
                treeNodeMap.put(node.getNodeVal(), nodeList);
            } else {
                nodeList.add(node);
            }
        }
        
        if (node.getChildren() == null) {
            return ;
        }
        
        for (FpTreeNode<K> child : node.getChildren()) {
            buildNodeEntryList(child);
        }
    }
    
    /**
     * 描述:构建项头表
     *
     * @param sourceData
     * @param absSupport
     * @return
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * @created on 2019年5月23日,下午8:36:58
     */
    @SuppressWarnings("unchecked")
    public FpTreeHeader<K, Integer> buildTable(List<List<K>> sourceData, int absSupport) {
        Assert.notNull(this.helper, "helper cannot be null, Set helper first!");
        
        logger.debug("构建项头表.");
        for (List<K> data : sourceData) {
            for (K k : data) {
                if (this.get(k) == null) {
                    this.put(k, 1);
                } else {
                    this.put(k, this.get(k) + 1);
                }
            }
        }
        
        // 过滤不满足项目
        Set<java.util.Map.Entry<K, java.lang.Integer>> set = this.entrySet();
        Iterator<java.util.Map.Entry<K, java.lang.Integer>> ite = set.iterator();
        while (ite.hasNext()) {
            java.util.Map.Entry<K, java.lang.Integer> entry = ite.next();
            if (entry.getValue() < absSupport) {
                ite.remove();
            }
        }
        
        // 项头表排序
        List<K> keylist = new ArrayList<K>(this.keySet());
        Map<K, Integer> thisRef = (Map<K, Integer>) new LinkedHashMap<String, Integer>();
        ListSortUtils.sort(keylist, this.getHelper().nodeEleCompare((FpTreeHeader<K, java.lang.Integer>) this));
        for (K k : keylist) {
            thisRef.put(k, (Integer) this.get(k));
        }
        this.clear();
        this.putAll((Map<? extends K, ? extends java.lang.Integer>) thisRef);
        
        // 对原始输入数据过滤并排序
        for (List<K> data : sourceData) {
            for (Iterator<K> itr = data.iterator(); itr.hasNext(); ) {
                K k = itr.next();
                if (!this.containsKey(k)) {
                    itr.remove();
                }
            }
            FpTreeHeader<K, java.lang.Integer> _this = (FpTreeHeader<K, java.lang.Integer>) this;
            ListSortUtils.sort(data, new Comparator<K>() {
                @Override
                public int compare(K o1, K o2) {
                    int i = _this.get(o2) - _this.get(o1);
                    if (i == 0) {
                        Iterator<java.util.Map.Entry<K, java.lang.Integer>> itr = _this.entrySet().iterator();
                        int index1 = 0;
                        int index2 = 0;
                        for (int a = 0,b = 0; itr.hasNext(); ) {
                            a = a + 1;
                            b = b + 1;
                            java.util.Map.Entry<K, java.lang.Integer> entry = itr.next();
                            if (helper.nodeCompare(entry.getKey(), o1)) {
                                index1 = a;
                            } else if (helper.nodeCompare(entry.getKey(), o2)) {
                                index2 = b;
                            }
                        }
                        i = index1 - index2;
                    }
                    return i;
                }
                
            });
            if (!data.isEmpty()) {
                inputData.add(data);
            }
        }
        sourceData = null;
        logger.debug("构建项头表完成.");
        return this;
    }

    public List<List<K>> getInputData() {
        return inputData;
    }

    public void setInputData(List<List<K>> inputData) {
        this.inputData = inputData;
    }

    public FpTreeHelper<K> getHelper() {
        return helper;
    }

    public void setHelper(FpTreeHelper<K> helper) {
        this.helper = helper;
    }

    public Map<K, List<FpTreeNode<K>>> getTreeNodeMap() {
        return treeNodeMap;
    }

    public void setTreeNodeMap(Map<K, List<FpTreeNode<K>>> treeNodeMap) {
        this.treeNodeMap = treeNodeMap;
    }
}

FpTree类:

package com.slyk.sdp.algorithms.externalAlgorithms.fpTree;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;

import com.slyk.sdp.algorithms.externalAlgorithms.fpTree.util.DoubleKeyMap;

/**
 * FPtree
 * 
 * 描述:@param <T>
 *
 * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
 * @created on 2019年6月3日,下午1:34:22
 */
public class FpTree<T> {
    private static Logger logger = LoggerFactory.getLogger(FpTree.class);
    
    /**
     * 项头表
     */
    private FpTreeHeader<T, Integer> fpTreeHeader;
    
    /**
     * helper
     */
    private FpTreeHelper<T> helper;
    
    /**
     * root node
     */
    private FpTreeNode<T> root;
    
    /**
     * 默认频繁度阈值
     */
    protected static final int DEFAULT_ABS_SUPPORT = 0xf;
    
    private int absSupport = DEFAULT_ABS_SUPPORT;
    
    /**
     * 默认置信度
     */
    private static final int DEFAULT_CONFIDENT = 3;
    
    /**
     * 置信度
     */
    private int confident = DEFAULT_CONFIDENT;
    
    /**
     * 描述:挖掘树
     * <br/>代码参考自《机器学习实战》
     *
     * @param outList
     * @param tree
     * @param basePat
     * @return
     * @throws ClassNotFoundException
     * @throws IOException
     * @author <a href='mailto:xiaomingyang@shulianyikang.com'>xiaomingyang</a>
     * @created on 2019年5月31日,下午5:50:45
     */
    public List<List<T>> fpGrowth(List<List<T>> outList, FpTree<T> tree, List<T> prefix) throws ClassNotFoundException, IOException {
        logger.debug("开始conditionFpTree数据挖掘计算.");
        //
        // 挖掘频繁项集的步骤如下:
        // 1 从FP树提取条件模式基
  
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值