伪代码
算法:Apriori
输入:D - 事务数据库;min_sup - 最小支持度计数阈值
输出:L - D中的频繁项集
方法:
L1=find_frequent_1-itemsets(D); // 找出所有频繁1项集
For(k=2;Lk-1!=null;k++){
Ck=apriori_gen(Lk-1); // 产生候选,并剪枝
For each 事务t in D{ // 扫描D进行候选计数
Ct =subset(Ck,t); // 得到t的子集
For each 候选c 属于 Ct
c.count++;
}
Lk={c属于Ck | c.count>=min_sup}
}
Return L=所有的频繁集;
Procedure apriori_gen(Lk-1:frequent(k-1)-itemsets)
For each项集l1属于Lk-1
For each项集 l2属于Lk-1
If((l1[1]=l2[1])&&( l1[2]=l2[2])&&…….
&& (l1[k-2]=l2[k-2])&&(l1[k-1]<l2[k-1])) then{
c=l1连接l2 //连接步:产生候选
if has_infrequent_subset(c,Lk-1) then
delete c; //剪枝步:删除非频繁候选
else add c to Ck;
}
Return Ck;
Procedure has_infrequent_sub(c:candidate k-itemset; Lk-1:frequent(k-1)-itemsets)
For each(k-1)-subset s of c
If s不属于Lk-1 then
Return true;
Return false;
Java实现
import java.util.*;
public class dmm {
int zcd = 2; //最小支持度
double kxd = 0.7; //
String fg = ";"; //每个事务的分隔符
Map<String, Integer> frenquentResultMap=new TreeMap<>(); //频繁项目集的结果集,按字典序排序
public ArrayList<String> findFrenquentOneSet(ArrayList<String> date) {
Map<String, Integer> map = new HashMap<>();
ArrayList<String> result=new ArrayList<>();
for (String s : date) { //获得C1
String[] ss = s.split(fg);
for (String c : ss) {
if (!(map.containsKey(c)))
map.put(c, 1);
else
map.put(c, map.get(c) + 1);
}
}
for (String p : map.keySet()) { //获得L1
if (map.get(p) >= zcd) {
frenquentResultMap.put(p, map.get(p));
result.add(p+fg);
}
}
return result; //返回L1
}
public ArrayList<String> findFrenquentNSet(ArrayList<String> Nset, ArrayList<String> date) { //传入数据集和CN
ArrayList<String> result=new ArrayList<>();
for (String s : Nset) {
String[] s1 = s.split(fg);
for (String ss : Nset) {
String zuhe = "";
String[] s2 = ss.split(fg);
int n=0;
for(int i=0;i< s2.length;i++){
if ((i!= s2.length-1&&s1[i].compareTo(s2[i])==0)||(i== s2.length-1&&s1[i].compareTo(s2[i])<0)) {
zuhe += s1[i] + fg;
n++;
}
else
break;;
}
if(n==s2.length){
zuhe+=s2[s2.length-1]+fg;
result.add(zuhe);
}
}
}
return result; //返回LN+1
}
public ArrayList<String> countFrenquentmembers(ArrayList<String> date, ArrayList<String> Nset) {
ArrayList<String> result=new ArrayList<>();
for (String s : Nset) {
int count = 0;
String[] s1 = s.split(fg);
for (String ss : date) {
String[] s2 = ss.split(fg);
int n = 0;
for (String c : s1) {
for (String cc : s2) {
if (c .compareTo(cc)==0)
n++;
}
}
if (n !=s1.length)
continue;
count++;
}
if (count >=zcd) {
frenquentResultMap.put(s, count);
result.add(s);
}
}
return result; //返回CN
}
public static void main(String[] args) {
ArrayList<String> dataList = new ArrayList<>();
dataList.add("1;2;5;");
dataList.add("2;4;");
dataList.add("2;3;");
dataList.add("1;2;4;");
dataList.add("1;3;");
dataList.add("2;3;");
dataList.add("1;3;");
dataList.add("1;2;3;5;");
dataList.add("1;2;3;");
System.out.println("====数据集合==========");
for(String string:dataList)
{
System.out.println(string);
}
dmm aprior=new dmm();
ArrayList<String> arr=aprior.findFrenquentOneSet(dataList);
while(aprior.findFrenquentNSet(arr,dataList).size()!=0){
arr=aprior.countFrenquentmembers(dataList,aprior.findFrenquentNSet(arr,dataList));
}
System.out.println("====频繁项集==========");
for(String key:aprior.frenquentResultMap.keySet())
{
System.out.println(key+" : "+aprior.frenquentResultMap.get(key));
}
}
}
结果展示