apriori算法是经典的求频繁项集的算法,基本原理是:找出频繁1项集(需事先规定好门限),然后利用频繁1项集产生频繁2项集,以此类推。代码如下:
public class AprioriAlgorithm {
private static final int THRESHOLD = 2;
/**
*
* @Title: appriGen
* @Description: 产生频繁1项集
* @return ArrayList<ArrayList<String>>
* @throws
*/
public ArrayList<ArrayList<String>> freq1Gen(ArrayList<ArrayList<String>> list) {
Map<String, Integer> candItemMap = new HashMap<String, Integer>();
for(int i = 0; i < list.size(); i++) {
for(int j = 0; j < list.get(i).size(); j++) {
if(null != candItemMap.get(list.get(i).get(j))) {
candItemMap.put(list.get(i).get(j), candItemMap.get(list.get(i).get(j)) + 1);
} else {
candItemMap.put(list.get(i).get(j), 1);
}
}
}
Iterator<String> iter = candItemMap.keySet().iterator();
ArrayList<ArrayList<String>> dataList = new ArrayList<ArrayList<String>>();
while(iter.hasNext()) {
String tmp = iter.next();
if(candItemMap.get(tmp) >= THRESHOLD) {
ArrayList<String> tmpList = new ArrayList<String>();
tmpList.add(tmp);
dataList.add(tmpList);
}
}
return dataList;
}
/**
*
* @Title: candidateGen
* @Description: 由频繁k项集产生频繁k+1项集,dataLength代表item个数
* @return ArrayList<ArrayList<String>>
* @throws
*/
public ArrayList<ArrayList<String>> freqKGen(ArrayList<ArrayList<String>>candiList, int k, ArrayList<ArrayList<String>> originalData) {
ArrayList<ArrayList<String>> res = new ArrayList<ArrayList<String>>();
for(int i = 0; i < candiList.size() - 1; i++) {
for(int j = i + 1; j < candiList.size(); j++) {
ArrayList<String> tmp = new ArrayList<String>();
if(k == 1) {
tmp.add(candiList.get(i).get(0));
tmp.add(candiList.get(j).get(0));
} else {
for(int p = 0; p < k - 1; p++) {
if(candiList.get(i).get(p) != candiList.get(j).get(p)) {
break;
}
tmp.add(candiList.get(i).get(p));
if(p == k - 2 && candiList.get(i).get(k-1) != candiList.get(j).get(k-1)) { // 表明前k-1项相等
tmp.add(candiList.get(i).get(k-1));
tmp.add(candiList.get(j).get(k-1));
}
}
}
res.add(tmp);
}
}
Iterator<ArrayList<String>> iter = res.iterator();
while(iter.hasNext()) {
ArrayList<String> tmp = iter.next();
if(AprioriUtil.getFreq(originalData, tmp) < THRESHOLD) {
iter.remove();
}
}
return res;
}
public static void main(String[] args) {
ArrayList<ArrayList<String>> list = new ArrayList<ArrayList<String>>();
ArrayList<String> tmp = new ArrayList<String>();
tmp.add("m");
tmp.add("s");
tmp.add("e");
list.add(tmp);
ArrayList<String> tmp1 = new ArrayList<String>();
tmp1.add("m");
tmp1.add("t");
tmp1.add("p");
list.add(tmp1);
ArrayList<String> tmp2 = new ArrayList<String>();
tmp2.add("m");
tmp2.add("t");
tmp2.add("p");
tmp2.add("s");
list.add(tmp2);
ArrayList<String> tmp3 = new ArrayList<String>();
tmp3.add("t");
tmp3.add("p");
list.add(tmp3);
AprioriAlgorithm dd = new AprioriAlgorithm();
ArrayList<ArrayList<String>> res = dd.freq1Gen(list);
ArrayList<ArrayList<String>> res1 = dd.freqKGen(res, 1, list);
ArrayList<ArrayList<String>> res2 = dd.freqKGen(res1, 2, list);
ArrayList<ArrayList<String>> res3 = dd.freqKGen(res2, 3, list);
ArrayList<ArrayList<String>> res4 = dd.freqKGen(res3, 4, list);
System.out.println(res1);
}
}