java实现关联分析算法Apriori

最新推荐文章于 2022-12-18 21:08:26 发布

fanzhijun301

最新推荐文章于 2022-12-18 21:08:26 发布

阅读量4.1k

点赞数

文章标签：算法 java string null class list

本文链接：https://blog.csdn.net/fanzhijun301/article/details/6003527

版权

package com.dataming.association;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;

import org.apache.log4j.Logger;

public class Apriori {

private static final Logger log = Logger.getLogger(Apriori.class);

private int min_sport = 2;

private List<String> items; //这里面的内容一定要按照顺序存放
private List<List<Integer>> bitVectorList = new ArrayList<List<Integer>>();
private List<CFCon> candidateList = new ArrayList<CFCon>();
private List<CFCon> freqenceList = new ArrayList<CFCon>();

public static void main(String args[]){
 Apriori apriori = new Apriori();

 apriori.generateData();
 apriori.apriMain();
 apriori.printFreqItems();

}

private void printFreqItems(){
 CFCon cfcL = freqenceList.get(freqenceList.size() - 1);
 for(CF cf : cfcL.cfList){
 String kk = "";
 List<String> itemList = cf.itemList;
 for(int i = 0; i < itemList.size(); i++){
 if(i == 0){
 kk = itemList.get(i);
 } else {
 kk += "," + itemList.get(i);
 }
 }
 log.info("freqence: " + kk + " supCount:" + cf.supCount);
 }
}

private void apriMain(){
 //C1
 CFCon cfcC1 = find_frequent_1_itemsets();

 candidateList.add(cfcC1);
 CFCon cfcL1 = candidateToFreqent(cfcC1);
 freqenceList.add(cfcL1);

 CFCon cfcL = cfcL1;

 HashSet<String> set = new HashSet<String>();
 for(int k = 2; cfcL != null && cfcL.cfList != null && cfcL.cfList.size() > 0; k++){
 CFCon cfcCk = getCandateFroFreq(cfcL);
 //为cfcC计数
 for(List<Integer> bitVector : bitVectorList){
 set.clear();
 for(int i = 0; i < items.size(); i++){
 int bit = bitVector.get(i);
 if(bit == 1){
 set.add(items.get(i));
 }
 }
 List<CF> cfList = cfcCk.cfList;
 for(CF cf : cfList){
 List<String> itemList = cf.itemList;
 boolean isAdd = true;
 for(String item : itemList){
 if(!set.contains(item)){
 isAdd = false;
 break;
 }
 }
 if(isAdd)cf.supCount++;
 }
 }

 cfcL = candidateToFreqent(cfcCk);
 if(cfcCk.cfList != null && cfcCk.cfList.size() > 0)candidateList.add(cfcCk);
 if(cfcL.cfList != null && cfcL.cfList.size() > 0)freqenceList.add(cfcL);
 }
}

/**
* 从L(k-1) 生成 C(k);
*
* @param cfc
* @return
*/
private CFCon getCandateFroFreq(CFCon cfcL){
 CFCon cfcC = null;

 if(cfcL != null){
 cfcC = new CFCon(1, cfcL.iteratNum + 1);
 List<CF> cfList = cfcL.cfList;
 for(int outIndex = 0; outIndex < cfList.size(); outIndex++){
 CF cfOut = cfList.get(outIndex);
 List<String> itemOutList = cfOut.itemList;
 for(int inIndex = outIndex + 1; inIndex < cfList.size(); inIndex++){
 if(outIndex == inIndex) continue;

 CF cfIn = cfList.get(inIndex);
 List<String> itemInList = cfIn.itemList;

 List<String> itemList = new ArrayList<String>();

 boolean same = true;
 for(int index = 0; index < itemOutList.size() - 1; index++){
 String out = itemOutList.get(index);
 String in = itemInList.get(index);
 if(out == null || in == null || !out.equals(in)){
 same = false;
 break;
 }
 itemList.add(out);
 }
 if(same){
 String out = itemOutList.get(itemOutList.size() - 1 );
 String in = itemInList.get(itemInList.size() - 1);
 if(out != null && in != null && !out.equals(in)){
 if(out.compareTo(in) >= 0){
 itemList.add(in);
 itemList.add(out);
 } else {
 itemList.add(out);
 itemList.add(in);
 }
 CF cf = new CF(itemList, 0);
 if(!has_infreqent_subset(itemList, cfcL)){
 cfcC.cfList.add(cf);
 }
 }
 }
 }
 }
 }

 return cfcC;
}

/**
* 在L(k-1)查找是否存在,cList(k-1)子集
*
* @param cList
* @param cfc L(k-1)
* @return
*/
private boolean has_infreqent_subset(List<String> cList, CFCon cfc){
 HashSet<String> set = new HashSet<String>();

 List<CF> cfList = cfc.cfList;
 for(int index = 0; index < cfList.size(); index++){
 CF cf = cfList.get(index);
 List<String> itemList = cf.itemList;
 String key = "";
 boolean first = true;
 for(String item : itemList){
 if(first){
 first = false;
 key = item;
 } else {
 key += "," + item;
 }
 }
 set.add(key);
 }

 StringBuilder sb = new StringBuilder();
 for(int index = 0; index < cList.size(); index++){

 sb.delete(0, sb.length());
 boolean first = true;
 for(int index2 = 0; index2 < cList.size(); index2++){
 if(index2 == index)continue;
 else {
 if(first){
 sb.append(cList.get(index2));
 first = false;
 } else {
 sb.append(",");
 sb.append(cList.get(index2));
 }
 }
 }
 boolean setCon = set.contains(sb.toString());
 if(!setCon) return true;
 }

 return false;
}

private class CFCon {

 List<CF> cfList;
 int cOrf; //1.候选集，2,频繁集
 int iteratNum; //迭代次数

 public CFCon(int cOrf, int iteratNum){
 cfList = new ArrayList<CF>();
 this.cOrf = cOrf;
 this.iteratNum = iteratNum;
 }

 public CFCon(int n, int cOrf, int iteratNum){
 this.cOrf = cOrf;
 this.iteratNum = iteratNum;
 cfList = new ArrayList<CF>();
 for(int index = 0; index < n; index++){
 List<String> itemList = new ArrayList<String>();
 itemList.add(items.get(index));

 CF cf = new CF(itemList, 0);

 cfList.add(cf);
 }
 }
}

private class CF {
 List<String> itemList;
 int supCount;

 public CF(List<String> itemList, int supCount){
 this.itemList = itemList;
 this.supCount = supCount;
 }
}

private CFCon find_frequent_1_itemsets(){

 CFCon cfc = null;
 if(bitVectorList != null && items != null){
 cfc = new CFCon(items.size(), 1, 1);
 for(List<Integer> bitVector : bitVectorList){
 if(bitVector != null){
 for(int index = 0; index < bitVector.size(); index++){
 int bit = bitVector.get(index);
 CF cf = cfc.cfList.get(index);
 if(bit == 1) cf.supCount++;
 }
 }
 }
 }
 return cfc;
}

/**
* 通过min_suport过滤掉最小的
*
* @param cfcC
* @return
*/
private CFCon candidateToFreqent(CFCon cfcC){
 List<CF> cfList = cfcC.cfList;

 CFCon cfcL = new CFCon(2, cfcC.iteratNum);
 if(cfList != null){
 for(int index = cfList.size() - 1; index >= 0; index--){
 CF cf = cfList.get(index);
 int supCount = cf.supCount;
 if(supCount >= min_sport){
 cfcL.cfList.add(cf);
 }
 }
 }
 return cfcL;
}

private void generateData(){
 items = new ArrayList<String>();
 for(int index = 1; index <=5; index++) items.add("I" + index);

 bitVectorList.add(getStrList("1,1,0,0,1"));
 bitVectorList.add(getStrList("0,1,0,1,0"));
 bitVectorList.add(getStrList("0,1,1,0,0"));
 bitVectorList.add(getStrList("1,1,0,1,0"));
 bitVectorList.add(getStrList("1,0,1,0,0"));
 bitVectorList.add(getStrList("0,1,1,0,0"));
 bitVectorList.add(getStrList("1,0,1,0,0"));
 bitVectorList.add(getStrList("1,1,1,0,1"));
 bitVectorList.add(getStrList("1,1,1,0,0"));

}

private List<Integer> getStrList(String bitVector){
 List<Integer> list = new ArrayList<Integer>();
 if(bitVector != null){
 String[] bitArr = bitVector.split(",");
 for(String bit : bitArr){
 list.add(Integer.parseInt(bit));
 }
 }
 return list;
}
}

fanzhijun301

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
1
评论
java实现关联分析算法Apriori

 package com.dataming.association; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import org.apache.log4j.Logger; public class Apriori { private static final Logger log = Logger.getLogger(Ap
复制链接

扫一扫