FP-tree频繁模式挖掘算法

package Fp_Tree;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;

public class Myfptree2 {
    final static double STANDARD = (88162 * 0.01);
    final static File INFILE = new File("d:\\3.txt");
    final static List<Set<String>> DATABASE = new ArrayList<>();
    public static int number = 0;
    public static Comparator<table> com = new Comparator<table>() {
        @Override
        public int compare(table o1, table o2) {
            return -Integer.compare(o1.frequency, o2.frequency);
        }
    };

    public static void main(String[] args) throws IOException {
        go();
    }

    public static double getStandard() {
        return STANDARD;
    }

    public static void go() throws IOException {
        double start = System.currentTimeMillis();

        BufferedReader br = new BufferedReader(new FileReader(INFILE));
        String str;
        Map<String, Integer> c1 = new HashMap<>();
        while ((str = br.readLine()) != null) {
            String[] tmp = str.split(" ");
            Set<String> ss = new HashSet<>();
            for (String s : tmp) {
                if (!c1.containsKey(s)) c1.put(s, 1);
                else {
                    int x = c1.get(s);
                    c1.put(s, x + 1);
                }
                ss.add(s);
            }
            DATABASE.add(ss);
        }
        br.close();
        //计算c1完毕*******************************
        List<table> headerTable = getTable(c1);
        headerTable.sort(com);
        //创建第一个头表***********************

        //第二次扫描数据库得到frequent items
        frequentItems[] fi = new frequentItems[DATABASE.size()];
        int k = 0;
        for (Set<String> s : DATABASE) {
            frequentItems f = new frequentItems();
            for (table t : headerTable) {
                if (s.contains(t.item)) {
                    f.linkedSet.add(t.item);
                }
            }
            f.num = 1;
            fi[k] = f;
            k++;
        }
        node treeTop = createFP(headerTable, fi);
        dfsAll(headerTable, treeTop);
        System.out.println(number);

        double end = System.currentTimeMillis();
        System.out.println("运行时间"+((end - start) / 1000)+"s");
    }

    /**
     * @param headerTable FP树对应的头表
     * @param topOfTree   FP树的顶端节点
     *                    递归遍历所有树,并计算频繁子集的数量
     */
    public static void dfsAll(List<table> headerTable, node topOfTree) {
        if (isSignal(headerTable)) {//如果现在的树是单列树,则全排列后计算数量
            int size = headerTable.size();
            int res = 1;
            for (int i = 0; i < size; i++) {
                res *= 2;
            }
            number += res - 1;
            return;
        }
        //如果不是单列树的话,从底部开始对每个table对应的数据进行操作
        for (int i = headerTable.size() - 1; i >= 0; i--) {
            node lastNode = headerTable.get(i).head;//取出最后一行的头结点
            //生成下一个fre***************
            frequentItems[] subFrequentItems = new frequentItems[headerTable.get(i).frequency];
            Map<String, Integer> subMap = new HashMap<>();
            int k = 0;
            while (lastNode.next != null) {
                lastNode = lastNode.next;//用于横向遍历
                node now = lastNode;//用于遍历父节点

                int j = 0;
                frequentItems f = new frequentItems();
                while (now.parent != null) {//存的顺序是反的
                    now = now.parent;
                    if (now != null) {
                        f.linkedSet.add(now.value);
                        if (!subMap.containsKey(now.value)) {
                            subMap.put(now.value,lastNode.num);
                        } else {
                            int x = subMap.get(now.value);
                            subMap.put(now.value, x + lastNode.num);
                        }
                    }
                    j++;
                }
                f.num = lastNode.num;
                subFrequentItems[k] = f;
                k++;
            }
            //生成fre成功************
            List<table> subHeaderTable = getTable(subMap);
            subHeaderTable.sort(com);
            subFrequentItems = cutFrequent(subFrequentItems, subHeaderTable);

            node subtree = createFP(subHeaderTable, subFrequentItems);
            dfsAll(subHeaderTable, subtree);

        }//System.out.println(headerTable.size());
        number += headerTable.size();

    }

    //过滤frequent中的非频繁项
    public static frequentItems[] cutFrequent(frequentItems[] frequentItems, List<table> header) {
        frequentItems[] res = new frequentItems[frequentItems.length];
        int k = 0;
        for (frequentItems f : frequentItems) {
            if (f == null) continue;
            frequentItems newf = new frequentItems();
            for (table t : header) {
                Iterator i = f.linkedSet.iterator();
                int flag = 0;
                while (i.hasNext()) {
                    String s = (String) i.next();
                    if (s.equals(t.item)) flag = 1;
                }
                if (flag == 1) newf.linkedSet.add(t.item);
            }
            newf.num = f.num;
            res[k] = newf;
            k++;
        }
        return res;
    }

    //根据map生成头表
    public static List<table> getTable(Map<String, Integer> map) {
        List<table> header = new ArrayList<>();
        Set entrySet = map.entrySet();
        Iterator iterator = entrySet.iterator();
        while (iterator.hasNext()) {
            Map.Entry entry = (Map.Entry) iterator.next();
            if (((int) entry.getValue()) >= STANDARD && !((String) entry.getKey()).equals("")) {
                table t = new table((String) entry.getKey(), (int) entry.getValue());
                header.add(t);
            }
        }
        return header;
    }

    //判断是不是单列表
    public static boolean isSignal(List<table> headerTable) {
        int flag = 0;
        for (table t : headerTable)
            if (t.head.next.next != null) {
                flag = 1;
                break;
            }
        if (flag == 1) return false;
        else return true;

    }

    /**
     * @param lt 头表
     * @param fi 根据头表将非频繁项去除后的下一个数据库
     * @return
     */

    public static node createFP(List<table> lt, frequentItems[] fi) {
        node top = new node();
        for (frequentItems f : fi) {//对于每一个频繁集
            node now = top;
            if (f != null) {
                for (String s : f.linkedSet) {//对于频繁集中的每一项,判断他在树中的哪个位置并进行相应的操作

                    int flag = 0;
                    //遍历所有子节点
                    for (node n : now.chile) {
                        if (n.value.equals(s)) {//如果找到了
                            n.num = n.num + f.num;
                            now = n;
                            flag = 1;
                            break;
                        }
                    }

                    if (flag == 0) {//如果没找到子节点的值是一样,说明要新开辟
                        node tmpnode = new node();
                        tmpnode.value = s;
                        tmpnode.num = f.num;
                        tmpnode.parent = now;
                        now.chile.add(tmpnode);
                        now = tmpnode;
                    }

                    //将表里的头结点连接到新的节点上
                    for (table t : lt) {
                        if (t.item.equals(s)) {
                            node next = t.head;
                            int flag1 = 0;
                            while (next.next != null) {
                                next = next.next;
                                if (next == now) {
                                    flag1 = 1;
                                    break;
                                }
                            }
                            if (flag1 == 1) break;
                            else {
                                next.next = now;
                            }
                        }
                    }
                }
            }
        }
        return top;
    }
}

class node {
    public Set<node> chile = new HashSet<>();
    public node parent = null;
    public node next = null;
    public String value = "";
    public int num = 0;
}

class table {
    public String item = "";
    public int frequency = 0;
    public node head = new node();

    public table(String item, int frequency) {
        this.item = item;
        this.frequency = frequency;
    }
}

class frequentItems {
    public LinkedHashSet<String> linkedSet = new LinkedHashSet<>();
    public int num = 0;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值