学习数据仓库与数据挖掘时实验中用到的一个算法
发现网上版本产生关联规则缺失,所以从头到尾拜读了下,并修改了几个部分
发出来有点乱,复制黏贴到eclipse里就ok
package apr;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.util.*;
public class Apriori {
private double minsup = 0.6;// 最小支持度
private double minconf = 0.2;// 最小置信度
//
注意使用IdentityHashMap,否则由于关联规则产生存在键值相同的会出现覆盖
private IdentityHashMap ruleMap = new
IdentityHashMap();
private String[] transSet = { "abc", "abc",
"acde", "bcdf", "abcd", "abcdf" };// 事务集合,可以根据需要从构造函数里传入
private int itemCounts = 0;//
候选1项目集大小,即字母的个数
private TreeSet[] frequencySet = new
TreeSet[40];// 频繁项集数组,[0]:代表1频繁集...
private TreeSet maxFrequency = new TreeSet();//
最大频繁集
private TreeSet candidate = new TreeSet();//
1候选集
private TreeSet candidateSet[] = new
TreeSet[40];// 候选集数组
private int frequencyIndex;
public Apriori() {
maxFrequency = new
TreeSet();
itemCounts = counts();//
初始化1候选集的大小
// 初始化其他两个
for (int i = 0; i
< itemCounts; i++) {
frequencySet[i]
= new TreeSet();
candidateSet[i]
= new TreeSet();
}
candidateSet[0] =
candidate;
}
public Apriori(String[] transSet) {
this.transSet = transSet;
maxFrequency = new
TreeSet();
itemCounts = counts();//
初始化1候选集的大小
// 初始化其他两个
for (int i = 0; i
< itemCounts; i++) {
frequencySet[i]
= new TreeSet();
candidateSet[i]
= new TreeSet();
}
candidateSet[0] =
candidate;
}
public int counts() {
String temp1 = null;
char temp2 = 'a';
// 遍历所有事务集String
加入集合,set自动去重了
for (int i = 0; i
< transSet.length; i++) {
temp1 =
transSet[i];
for (int j =
0; j < temp1.length(); j++) {
temp2
= temp1.charAt(j);
candidate.add(String.valueOf(temp2));
}
}
return candidate.size();
}
public void item1_gen() {
String temp1 = "";
double m = 0;
Iterator temp =
candidateSet[0].iterator();
while (temp.hasNext()) {
temp1 =
(String) temp.next();
m =
count_sup(temp1);
// 符合条件的加入
1候选集
if (m
>= minsup * transSet.length) {
frequencySet[0].add(temp1);
}
}
}
public double count_sup(String x) {
int temp = 0;
for (int i = 0; i
< transSet.length; i++) {
for (int j =
0; j < x.length(); j++) {
if
(transSet[i].indexOf(x.charAt(j)) == -1)
break;
else
if (j == (x.length() - 1))
temp++;
}
}
return temp;
}
public void canditate_gen(int k) {
String y = "", z = "", m =
"";
char c1 = 'a', c2 = 'a';
Iterator temp1 =
frequencySet[k - 2].iterator();
Iterator temp2 =
frequencySet[0].iterator();
TreeSet h = new TreeSet();
while (temp1.hasNext())
{
y = (String)
temp1.next();
c1 =
y.charAt(y.length() - 1);
while
(temp2.hasNext()) {
z
= (String) temp2.next();
c2
= z.charAt(0);
if
(c1 >= c2)
continue;
else
{
m
= y + z;
h.add(m);
}
}
temp2 =
frequencySet[0].iterator();
}
candidateSet[k - 1] = h;
}
// k候选集=>k频繁集
public void frequent_gen(int k) {
String s1 = "";
Iterator ix = candidateSet[k
- 1].iterator();
while (ix.hasNext()) {
s1 = (String)
ix.next();
if
(count_sup(s1) >= (minsup * transSet.length))
{
frequencySet[k
- 1].add(s1);
}
}
}
public boolean is_frequent_empty(int k)
{
if (frequencySet[k -
1].isEmpty())
return
true;
else
return
false;
}
public boolean included(String s1, String s2)
{
for (int i = 0; i
< s1.length(); i++) {
if
(s2.indexOf(s1.charAt(i)) == -1)
return
false;
else if (i ==
s1.length() - 1)
return
true;
}
return true;
}
public void maxfrequent_gen() {
int i, j;
Iterator iterator, iterator1,
iterator2;
String temp = "", temp1 = "",
temp2 = "";
for (i = 1; i <
frequencyIndex; i++) {
maxFrequency.addAll(frequencySet[i]);
}
// for (i = 0; i
< frequencyIndex; i++) {
// iterator1 =
frequencySet[i].iterator();
// while (iterator1.hasNext())
{
// temp1 = (String)
iterator1.next();
// for (j = i + 1; j
< frequencyIndex; j++) {
// iterator2 =
frequencySet[j].iterator();
// while (iterator2.hasNext())
{
// temp2 = (String)
iterator2.next();
// if (included(temp1,
temp2))
//
maxFrequency.remove(temp1);
// }
// }
// }
// }
}
public void print_maxfrequent() {
Iterator iterator =
maxFrequency.iterator();
System.out.print("产生规则频繁项集:");
while (iterator.hasNext())
{
System.out.print(toDigit((String)
iterator.next()) + "\t");
}
System.out.println();
}
public void rulePrint() {
String x, y;
double temp = 0;
Set hs =
ruleMap.keySet();
Iterator iterator =
hs.iterator();
StringBuffer sb = new
StringBuffer();
System.out.println("关联规则:");
while (iterator.hasNext())
{
x = (String)
iterator.next();
y =
(String) ruleMap.get(x);
temp =
(count_sup(x + y) / count_sup(x));
//x =
toDigit(x);
//y =
toDigit(y);
System.out.println(x
+ (x.length() < 5 ? "\t" : "") +
"-->" + y
+
"\t" + temp);
sb.append(" " + x + (x.length() < 5 ? "\t" : "") +
"-->" + y
+
"\t" + temp + "\t\n");
}
BufferedWriter bw = null;
try {
FileWriter fw
= new FileWriter("Asr.txt");
bw = new
BufferedWriter(fw);
bw.write("最小支持度
minsup = " + minsup);
bw.newLine();
bw.write("最小置信度
minconf = " + minconf);
bw.newLine();
bw.write("产生关联规则如下:
");
bw.newLine();
bw.write(sb.toString());
//
bw.newLine();
if (bw !=
null)
bw.close();
} catch (Exception e) {
e.printStackTrace();
}
}
public void subGen(String s) {
String x = "", y = "";
for (int i = 1; i
< (1 << s.length()) -
1; i++) {
for (int j =
0; j < s.length(); j++) {
if
(((1 << j) & i) != 0)
{
x
+= s.charAt(j);
}
}
for (int j
= 0; j < s.length(); j++) {
if
(((1 << j) & (~i)) !=
0) {
y
+= s.charAt(j);
}
}
if
(count_sup(x + y) / count_sup(x) >= minconf) {
ruleMap.put(x,
y);
}
x = "";
y = "";
}
}
public void ruleGen() {
String s;
Iterator iterator =
maxFrequency.iterator();
while (iterator.hasNext())
{
s = (String)
iterator.next();
subGen(s);
}
}
// for test
public void print1() {
Iterator temp =
candidateSet[0].iterator();
while (temp.hasNext())
System.out.println(temp.next());
}
// for test
public void print2() {
Iterator temp =
frequencySet[0].iterator();
while (temp.hasNext())
System.out.println((String)
temp.next());
}
// for test
public void print3() {
canditate_gen(1);
frequent_gen(2);
Iterator temp =
candidateSet[1].iterator();
Iterator temp1 =
frequencySet[1].iterator();
while (temp.hasNext())
System.out.println("候选"
+ (String) temp.next());
while (temp1.hasNext())
System.out.println("频繁"
+ (String) temp1.next());
}
public void print_canditate() {
for (int i = 0; i
< frequencySet[0].size(); i++) {
Iterator ix =
candidateSet[i].iterator();
Iterator iy =
frequencySet[i].iterator();
System.out.print("候选集"
+ (i + 1) + ":");
while
(ix.hasNext()) {
System.out.print((String)
ix.next() + "\t");
//System.out.print(toDigit((String)
ix.next()) + "\t");
}
System.out.print("\n"
+ "频繁集" + (i + 1) + ":");
while
(iy.hasNext()) {
System.out.print((String)
iy.next() + "\t");
//System.out.print(toDigit((String)
iy.next()) + "\t");
}
System.out.println();
}
}
private String toDigit(String str) {
if (str != null) {
StringBuffer
temp = new StringBuffer();
for (int i
= 0; i < str.length(); i++) {
char
c = str.charAt(i);
temp.append(((int)
c - 65) + " ");
}
return
temp.toString();
} else {
return
null;
}
}
public String[] getTrans_set() {
return transSet;
}
public void setTrans_set(String[] transSet)
{
transSet = transSet;
}
public double getMinsup() {
return minsup;
}
public void setMinsup(double minsup) {
this.minsup = minsup;
}
public double getMinconf() {
return minconf;
}
public void setMinconf(double minconf) {
this.minconf = minconf;
}
public void run() {
int k = 1;
item1_gen();
do {
k++;
canditate_gen(k);
frequent_gen(k);
} while
(!is_frequent_empty(k));
frequencyIndex = k - 1;
print_canditate();
maxfrequent_gen();
print_maxfrequent();
ruleGen();
rulePrint();
}
public static void main(String[] args) {
Apriori ap = new
Apriori();
ap.run();
}
}