java 关联分析_java实现关联分析算法Apriori

package com.dataming.association;

import java.util.ArrayList;

import java.util.HashSet;

import java.util.List;

import org.apache.log4j.Logger;

public class Apriori {

private static final Logger log = Logger.getLogger(Apriori.class);  private int min_sport = 2;  private List items; //这里面的内容一定要按照顺序存放 private List> bitVectorList = new ArrayList>(); private List candidateList = new ArrayList(); private List freqenceList = new ArrayList();  public static void main(String args[]){  Apriori apriori = new Apriori();    apriori.generateData();  apriori.apriMain();  apriori.printFreqItems();   }  private void printFreqItems(){  CFCon cfcL = freqenceList.get(freqenceList.size() - 1);  for(CF cf : cfcL.cfList){   String kk = "";   List itemList = cf.itemList;   for(int i = 0; i < itemList.size(); i++){    if(i == 0){     kk = itemList.get(i);    } else {     kk += "," + itemList.get(i);    }   }   log.info("freqence: " + kk + " supCount:" + cf.supCount);  } }  private void apriMain(){  //C1  CFCon cfcC1 = find_frequent_1_itemsets();    candidateList.add(cfcC1);  CFCon cfcL1 = candidateToFreqent(cfcC1);  freqenceList.add(cfcL1);    CFCon cfcL = cfcL1;    HashSet set = new HashSet();  for(int k = 2; cfcL != null && cfcL.cfList != null && cfcL.cfList.size() > 0; k++){   CFCon cfcCk = getCandateFroFreq(cfcL);   //为cfcC计数   for(List bitVector : bitVectorList){    set.clear();    for(int i = 0; i < items.size(); i++){     int bit = bitVector.get(i);     if(bit == 1){      set.add(items.get(i));     }    }    List cfList = cfcCk.cfList;    for(CF cf : cfList){     List itemList = cf.itemList;     boolean isAdd = true;     for(String item : itemList){      if(!set.contains(item)){       isAdd = false;       break;      }     }     if(isAdd)cf.supCount++;    }   }      cfcL = candidateToFreqent(cfcCk);   if(cfcCk.cfList != null && cfcCk.cfList.size() > 0)candidateList.add(cfcCk);   if(cfcL.cfList != null && cfcL.cfList.size() > 0)freqenceList.add(cfcL);  } }  /**  * 从L(k-1) 生成 C(k);  *   * @param cfc  * @return  */ private CFCon getCandateFroFreq(CFCon cfcL){  CFCon cfcC = null;    if(cfcL != null){   cfcC = new CFCon(1, cfcL.iteratNum + 1);   List cfList = cfcL.cfList;   for(int outIndex = 0; outIndex < cfList.size(); outIndex++){    CF cfOut = cfList.get(outIndex);    List itemOutList = cfOut.itemList;    for(int inIndex = outIndex + 1; inIndex < cfList.size(); inIndex++){     if(outIndex == inIndex) continue;          CF cfIn = cfList.get(inIndex);     List itemInList = cfIn.itemList;          List itemList = new ArrayList();          boolean same = true;     for(int index = 0; index < itemOutList.size() - 1; index++){      String out = itemOutList.get(index);      String in = itemInList.get(index);      if(out == null || in == null || !out.equals(in)){       same = false;       break;      }       itemList.add(out);     }     if(same){      String out = itemOutList.get(itemOutList.size() - 1 );      String in = itemInList.get(itemInList.size() - 1);      if(out != null && in != null && !out.equals(in)){       if(out.compareTo(in) >= 0){        itemList.add(in);        itemList.add(out);       } else {        itemList.add(out);        itemList.add(in);       }       CF cf = new CF(itemList, 0);       if(!has_infreqent_subset(itemList, cfcL)){        cfcC.cfList.add(cf);       }      }     }    }   }  }    return cfcC; }   /**  * 在L(k-1)查找是否存在,cList(k-1)子集  *   * @param cList  * @param cfc L(k-1)  * @return  */ private boolean has_infreqent_subset(List cList, CFCon cfc){  HashSet set = new HashSet();    List cfList = cfc.cfList;  for(int index = 0; index < cfList.size(); index++){   CF cf = cfList.get(index);   List itemList = cf.itemList;   String key = "";   boolean first = true;   for(String item : itemList){    if(first){     first = false;     key = item;    } else {     key += "," + item;    }   }   set.add(key);  }    StringBuilder sb = new StringBuilder();    for(int index = 0; index < cList.size(); index++){      sb.delete(0, sb.length());   boolean first = true;   for(int index2 = 0; index2 < cList.size(); index2++){    if(index2 == index)continue;    else {     if(first){      sb.append(cList.get(index2));      first = false;     } else {      sb.append(",");      sb.append(cList.get(index2));     }    }   }   boolean setCon = set.contains(sb.toString());   if(!setCon) return true;  }    return false; }  private class CFCon {    List cfList;  int cOrf;  //1.候选集,2,频繁集  int iteratNum; //迭代次数    public CFCon(int cOrf, int iteratNum){   cfList = new ArrayList();   this.cOrf = cOrf;   this.iteratNum = iteratNum;  }    public CFCon(int n, int cOrf, int iteratNum){   this.cOrf = cOrf;   this.iteratNum = iteratNum;   cfList = new ArrayList();   for(int index = 0; index < n; index++){    List itemList = new ArrayList();    itemList.add(items.get(index));        CF cf = new CF(itemList, 0);        cfList.add(cf);   }  } }  private class CF {  List itemList;  int supCount;    public CF(List itemList, int supCount){   this.itemList = itemList;   this.supCount = supCount;  } }  private CFCon find_frequent_1_itemsets(){    CFCon cfc = null;  if(bitVectorList != null && items != null){   cfc = new CFCon(items.size(), 1, 1);   for(List bitVector : bitVectorList){    if(bitVector != null){     for(int index = 0; index < bitVector.size(); index++){      int bit = bitVector.get(index);      CF cf = cfc.cfList.get(index);      if(bit == 1) cf.supCount++;     }    }   }  }  return cfc; }  /**  * 通过min_suport过滤掉最小的  *   * @param cfcC  * @return  */ private CFCon candidateToFreqent(CFCon cfcC){  List cfList = cfcC.cfList;    CFCon cfcL = new CFCon(2, cfcC.iteratNum);  if(cfList != null){   for(int index = cfList.size() - 1; index >= 0; index--){    CF cf = cfList.get(index);    int supCount = cf.supCount;    if(supCount >= min_sport){     cfcL.cfList.add(cf);    }   }  }  return cfcL; }  private void generateData(){  items = new ArrayList();  for(int index = 1; index <=5; index++) items.add("I" + index);    bitVectorList.add(getStrList("1,1,0,0,1"));  bitVectorList.add(getStrList("0,1,0,1,0"));  bitVectorList.add(getStrList("0,1,1,0,0"));  bitVectorList.add(getStrList("1,1,0,1,0"));  bitVectorList.add(getStrList("1,0,1,0,0"));  bitVectorList.add(getStrList("0,1,1,0,0"));  bitVectorList.add(getStrList("1,0,1,0,0"));  bitVectorList.add(getStrList("1,1,1,0,1"));  bitVectorList.add(getStrList("1,1,1,0,0"));   }  private List getStrList(String bitVector){  List list = new ArrayList();  if(bitVector != null){   String[] bitArr = bitVector.split(",");   for(String bit : bitArr){    list.add(Integer.parseInt(bit));   }  }  return list; }}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值