Apriori算法--关联规则 转载

package apr;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.*;


public class Apriori {

 private double minsup = 0.6;// 最小支持度
 private double minconf = 0.2;// 最小置信度

 // 注意使用IdentityHashMap,否则由于关联规则产生存在键值相同的会出现覆盖
 private IdentityHashMap ruleMap = new IdentityHashMap();

 private String[] transSet = { "abc", "abc", "acde", "bcdf", "abcd", "abcdf" };// 事务集合,可以根据需要从构造函数里传入

 private int itemCounts = 0;// 候选1项目集大小,即字母的个数
 private TreeSet[] frequencySet = new TreeSet[40];// 频繁项集数组,[0]:代表1频繁集...
 private TreeSet maxFrequency = new TreeSet();// 最大频繁集
 private TreeSet candidate = new TreeSet();// 1候选集
 private TreeSet candidateSet[] = new TreeSet[40];// 候选集数组
 private int frequencyIndex;

 
 public Apriori() {

  maxFrequency = new TreeSet();
  itemCounts = counts();// 初始化1候选集的大小

  // 初始化其他两个
  for (int i = 0; i < itemCounts; i++) {
   frequencySet[i] = new TreeSet();
   candidateSet[i] = new TreeSet();
  }
  candidateSet[0] = candidate;
 }

 
 public Apriori(String[] transSet) {
  this.transSet = transSet;
  maxFrequency = new TreeSet();
  itemCounts = counts();// 初始化1候选集的大小

  // 初始化其他两个
  for (int i = 0; i < itemCounts; i++) {
   frequencySet[i] = new TreeSet();
   candidateSet[i] = new TreeSet();
  }
  candidateSet[0] = candidate;
 }

 
 public int counts() {

  String temp1 = null;
  char temp2 = 'a';
  // 遍历所有事务集String 加入集合,set自动去重了
  for (int i = 0; i < transSet.length; i++) {
   temp1 = transSet[i];
   for (int j = 0; j < temp1.length(); j++) {
    temp2 = temp1.charAt(j);
    candidate.add(String.valueOf(temp2));
   }
  }
  return candidate.size();
 }

 
 public void item1_gen() {
  String temp1 = "";
  double m = 0;

  Iterator temp = candidateSet[0].iterator();
  while (temp.hasNext()) {
   temp1 = (String) temp.next();
   m = count_sup(temp1);

   // 符合条件的加入 1候选集
   if (m >= minsup * transSet.length) {
    frequencySet[0].add(temp1);
   }
  }
 }

 
 public double count_sup(String x) {
  int temp = 0;
  for (int i = 0; i < transSet.length; i++) {
   for (int j = 0; j < x.length(); j++) {
    if (transSet[i].indexOf(x.charAt(j)) == -1)
     break;
    else if (j == (x.length() - 1))
     temp++;
   }
  }
  return temp;
 }

 
 public void canditate_gen(int k) {
  String y = "", z = "", m = "";
  char c1 = 'a', c2 = 'a';

  Iterator temp1 = frequencySet[k - 2].iterator();
  Iterator temp2 = frequencySet[0].iterator();
  TreeSet h = new TreeSet();

  while (temp1.hasNext()) {
   y = (String) temp1.next();
   c1 = y.charAt(y.length() - 1);

   while (temp2.hasNext()) {
    z = (String) temp2.next();

    c2 = z.charAt(0);
    if (c1 >= c2)
     continue;
    else {
     m = y + z;
     h.add(m);
    }
   }
   temp2 = frequencySet[0].iterator();
  }
  candidateSet[k - 1] = h;
 }

 // k候选集=>k频繁集
 public void frequent_gen(int k) {
  String s1 = "";

  Iterator ix = candidateSet[k - 1].iterator();
  while (ix.hasNext()) {
   s1 = (String) ix.next();
   if (count_sup(s1) >= (minsup * transSet.length)) {
    frequencySet[k - 1].add(s1);
   }
  }
 }

 public boolean is_frequent_empty(int k) {
  if (frequencySet[k - 1].isEmpty())
   return true;
  else
   return false;
 }

 public boolean included(String s1, String s2) {
  for (int i = 0; i < s1.length(); i++) {
   if (s2.indexOf(s1.charAt(i)) == -1)
    return false;
   else if (i == s1.length() - 1)
    return true;
  }
  return true;
 }

 public void maxfrequent_gen() {
  int i, j;
  Iterator iterator, iterator1, iterator2;
  String temp = "", temp1 = "", temp2 = "";
  for (i = 1; i < frequencyIndex; i++) {
   maxFrequency.addAll(frequencySet[i]);
  }
  // for (i = 0; i < frequencyIndex; i++) {
  // iterator1 = frequencySet[i].iterator();
  // while (iterator1.hasNext()) {
  // temp1 = (String) iterator1.next();
  // for (j = i + 1; j < frequencyIndex; j++) {
  // iterator2 = frequencySet[j].iterator();
  // while (iterator2.hasNext()) {
  // temp2 = (String) iterator2.next();
  // if (included(temp1, temp2))
  // maxFrequency.remove(temp1);
  // }
  // }
  // }
  // }
 }

 
 public void print_maxfrequent() {
  Iterator iterator = maxFrequency.iterator();
  System.out.print("产生规则频繁项集:");
  while (iterator.hasNext()) {
   System.out.print(toDigit((String) iterator.next()) + "/t");
  }
  System.out.println();
 }

 
 public void rulePrint() {
  String x, y;
  double temp = 0;

  Set hs = ruleMap.keySet();
  Iterator iterator = hs.iterator();

  StringBuffer sb = new StringBuffer();
  System.out.println("关联规则:");
  while (iterator.hasNext()) {
   x = (String) iterator.next();

   y = (String) ruleMap.get(x);

   temp = (count_sup(x + y) / count_sup(x));
   
   //x = toDigit(x);
   //y = toDigit(y);

   System.out.println(x + (x.length() < 5 ? "/t" : "") + "-->" + y
     + "/t" + temp);
   sb.append("  " + x + (x.length() < 5 ? "/t" : "") + "-->" + y
     + "/t" + temp + "/t/n");
  }
  BufferedWriter bw = null;
  try {
   FileWriter fw = new FileWriter("Asr.txt");

   bw = new BufferedWriter(fw);

   bw.write("最小支持度 minsup = " + minsup);
   bw.newLine();
   bw.write("最小置信度 minconf = " + minconf);
   bw.newLine();
   bw.write("产生关联规则如下: ");
   bw.newLine();

   bw.write(sb.toString());
   // bw.newLine();

   if (bw != null)
    bw.close();
  } catch (Exception e) {
   e.printStackTrace();
  }

 }

 public void subGen(String s) {
  String x = "", y = "";
  for (int i = 1; i < (1 << s.length()) - 1; i++) {
   for (int j = 0; j < s.length(); j++) {
    if (((1 << j) & i) != 0) {
     x += s.charAt(j);
    }
   }

   for (int j = 0; j < s.length(); j++) {
    if (((1 << j) & (~i)) != 0) {

     y += s.charAt(j);

    }
   }
   if (count_sup(x + y) / count_sup(x) >= minconf) {
    ruleMap.put(x, y);
   }
   x = "";
   y = "";

  }
 }

 public void ruleGen() {
  String s;
  Iterator iterator = maxFrequency.iterator();
  while (iterator.hasNext()) {
   s = (String) iterator.next();
   subGen(s);
  }
 }

 // for test
 public void print1() {
  Iterator temp = candidateSet[0].iterator();
  while (temp.hasNext())
   System.out.println(temp.next());
 }

 // for test
 public void print2() {
  Iterator temp = frequencySet[0].iterator();
  while (temp.hasNext())
   System.out.println((String) temp.next());
 }

 // for test
 public void print3() {
  canditate_gen(1);
  frequent_gen(2);
  Iterator temp = candidateSet[1].iterator();
  Iterator temp1 = frequencySet[1].iterator();
  while (temp.hasNext())
   System.out.println("候选" + (String) temp.next());
  while (temp1.hasNext())
   System.out.println("频繁" + (String) temp1.next());
 }

 public void print_canditate() {

  for (int i = 0; i < frequencySet[0].size(); i++) {
   Iterator ix = candidateSet[i].iterator();
   Iterator iy = frequencySet[i].iterator();
   System.out.print("候选集" + (i + 1) + ":");
   while (ix.hasNext()) {
    System.out.print((String) ix.next() + "/t");
    //System.out.print(toDigit((String) ix.next()) + "/t");
   }
   System.out.print("/n" + "频繁集" + (i + 1) + ":");
   while (iy.hasNext()) {
    System.out.print((String) iy.next() + "/t");
    //System.out.print(toDigit((String) iy.next()) + "/t");
   }
   System.out.println();
  }
 }

 
 private String toDigit(String str) {
  if (str != null) {
   StringBuffer temp = new StringBuffer();

   for (int i = 0; i < str.length(); i++) {
    char c = str.charAt(i);
    temp.append(((int) c - 65) + " ");
   }

   return temp.toString();
  } else {
   return null;
  }

 }

 public String[] getTrans_set() {
  return transSet;
 }

 public void setTrans_set(String[] transSet) {
  transSet = transSet;
 }

 public double getMinsup() {
  return minsup;
 }

 public void setMinsup(double minsup) {
  this.minsup = minsup;
 }

 public double getMinconf() {
  return minconf;
 }

 public void setMinconf(double minconf) {
  this.minconf = minconf;
 }

 public void run() {
  int k = 1;

  item1_gen();

  do {
   k++;
   canditate_gen(k);
   frequent_gen(k);
  } while (!is_frequent_empty(k));
  frequencyIndex = k - 1;
  print_canditate();
  maxfrequent_gen();
  print_maxfrequent();
  ruleGen();
  rulePrint();

 }
 
 public static void main(String[] args) {
  Apriori ap = new Apriori();
  ap.run();
 }
 
 
 
 

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值