apriori2 java_Apriori算法java代码

本文介绍了一个基于Java实现的Apriori算法,用于数据挖掘中的关联规则学习。作者在研究过程中发现网上资源的关联规则部分不完整,于是从头实现并修复了问题。文章提供了一个简单的事务集合,通过Apriori算法生成频繁项集和关联规则,并输出到文件。代码包括频繁项集的生成、候选集的构造、以及置信度计算等关键步骤。
摘要由CSDN通过智能技术生成

学习数据仓库与数据挖掘时实验中用到的一个算法

发现网上版本产生关联规则缺失,所以从头到尾拜读了下,并修改了几个部分

发出来有点乱,复制黏贴到eclipse里就ok

package apr;

import java.io.BufferedWriter;

import java.io.FileWriter;

import java.util.*;

public class Apriori {

private double minsup = 0.6;// 最小支持度

private double minconf = 0.2;// 最小置信度

//

注意使用IdentityHashMap,否则由于关联规则产生存在键值相同的会出现覆盖

private IdentityHashMap ruleMap = new

IdentityHashMap();

private String[] transSet = { "abc", "abc",

"acde", "bcdf", "abcd", "abcdf" };// 事务集合,可以根据需要从构造函数里传入

private int itemCounts = 0;//

候选1项目集大小,即字母的个数

private TreeSet[] frequencySet = new

TreeSet[40];// 频繁项集数组,[0]:代表1频繁集...

private TreeSet maxFrequency = new TreeSet();//

最大频繁集

private TreeSet candidate = new TreeSet();//

1候选集

private TreeSet candidateSet[] = new

TreeSet[40];// 候选集数组

private int frequencyIndex;

public Apriori() {

maxFrequency = new

TreeSet();

itemCounts = counts();//

初始化1候选集的大小

// 初始化其他两个

for (int i = 0; i

< itemCounts; i++) {

frequencySet[i]

= new TreeSet();

candidateSet[i]

= new TreeSet();

}

candidateSet[0] =

candidate;

}

public Apriori(String[] transSet) {

this.transSet = transSet;

maxFrequency = new

TreeSet();

itemCounts = counts();//

初始化1候选集的大小

// 初始化其他两个

for (int i = 0; i

< itemCounts; i++) {

frequencySet[i]

= new TreeSet();

candidateSet[i]

= new TreeSet();

}

candidateSet[0] =

candidate;

}

public int counts() {

String temp1 = null;

char temp2 = 'a';

// 遍历所有事务集String

加入集合,set自动去重了

for (int i = 0; i

< transSet.length; i++) {

temp1 =

transSet[i];

for (int j =

0; j < temp1.length(); j++) {

temp2

= temp1.charAt(j);

candidate.add(String.valueOf(temp2));

}

}

return candidate.size();

}

public void item1_gen() {

String temp1 = "";

double m = 0;

Iterator temp =

candidateSet[0].iterator();

while (temp.hasNext()) {

temp1 =

(String) temp.next();

m =

count_sup(temp1);

// 符合条件的加入

1候选集

if (m

>= minsup * transSet.length) {

frequencySet[0].add(temp1);

}

}

}

public double count_sup(String x) {

int temp = 0;

for (int i = 0; i

< transSet.length; i++) {

for (int j =

0; j < x.length(); j++) {

if

(transSet[i].indexOf(x.charAt(j)) == -1)

break;

else

if (j == (x.length() - 1))

temp++;

}

}

return temp;

}

public void canditate_gen(int k) {

String y = "", z = "", m =

"";

char c1 = 'a', c2 = 'a';

Iterator temp1 =

frequencySet[k - 2].iterator();

Iterator temp2 =

frequencySet[0].iterator();

TreeSet h = new TreeSet();

while (temp1.hasNext())

{

y = (String)

temp1.next();

c1 =

y.charAt(y.length() - 1);

while

(temp2.hasNext()) {

z

= (String) temp2.next();

c2

= z.charAt(0);

if

(c1 >= c2)

continue;

else

{

m

= y + z;

h.add(m);

}

}

temp2 =

frequencySet[0].iterator();

}

candidateSet[k - 1] = h;

}

// k候选集=>k频繁集

public void frequent_gen(int k) {

String s1 = "";

Iterator ix = candidateSet[k

- 1].iterator();

while (ix.hasNext()) {

s1 = (String)

ix.next();

if

(count_sup(s1) >= (minsup * transSet.length))

{

frequencySet[k

- 1].add(s1);

}

}

}

public boolean is_frequent_empty(int k)

{

if (frequencySet[k -

1].isEmpty())

return

true;

else

return

false;

}

public boolean included(String s1, String s2)

{

for (int i = 0; i

< s1.length(); i++) {

if

(s2.indexOf(s1.charAt(i)) == -1)

return

false;

else if (i ==

s1.length() - 1)

return

true;

}

return true;

}

public void maxfrequent_gen() {

int i, j;

Iterator iterator, iterator1,

iterator2;

String temp = "", temp1 = "",

temp2 = "";

for (i = 1; i <

frequencyIndex; i++) {

maxFrequency.addAll(frequencySet[i]);

}

// for (i = 0; i

< frequencyIndex; i++) {

// iterator1 =

frequencySet[i].iterator();

// while (iterator1.hasNext())

{

// temp1 = (String)

iterator1.next();

// for (j = i + 1; j

< frequencyIndex; j++) {

// iterator2 =

frequencySet[j].iterator();

// while (iterator2.hasNext())

{

// temp2 = (String)

iterator2.next();

// if (included(temp1,

temp2))

//

maxFrequency.remove(temp1);

// }

// }

// }

// }

}

public void print_maxfrequent() {

Iterator iterator =

maxFrequency.iterator();

System.out.print("产生规则频繁项集:");

while (iterator.hasNext())

{

System.out.print(toDigit((String)

iterator.next()) + "\t");

}

System.out.println();

}

public void rulePrint() {

String x, y;

double temp = 0;

Set hs =

ruleMap.keySet();

Iterator iterator =

hs.iterator();

StringBuffer sb = new

StringBuffer();

System.out.println("关联规则:");

while (iterator.hasNext())

{

x = (String)

iterator.next();

y =

(String) ruleMap.get(x);

temp =

(count_sup(x + y) / count_sup(x));

//x =

toDigit(x);

//y =

toDigit(y);

System.out.println(x

+ (x.length() < 5 ? "\t" : "") +

"-->" + y

+

"\t" + temp);

sb.append(" " + x + (x.length() < 5 ? "\t" : "") +

"-->" + y

+

"\t" + temp + "\t\n");

}

BufferedWriter bw = null;

try {

FileWriter fw

= new FileWriter("Asr.txt");

bw = new

BufferedWriter(fw);

bw.write("最小支持度

minsup = " + minsup);

bw.newLine();

bw.write("最小置信度

minconf = " + minconf);

bw.newLine();

bw.write("产生关联规则如下:

");

bw.newLine();

bw.write(sb.toString());

//

bw.newLine();

if (bw !=

null)

bw.close();

} catch (Exception e) {

e.printStackTrace();

}

}

public void subGen(String s) {

String x = "", y = "";

for (int i = 1; i

< (1 << s.length()) -

1; i++) {

for (int j =

0; j < s.length(); j++) {

if

(((1 << j) & i) != 0)

{

x

+= s.charAt(j);

}

}

for (int j

= 0; j < s.length(); j++) {

if

(((1 << j) & (~i)) !=

0) {

y

+= s.charAt(j);

}

}

if

(count_sup(x + y) / count_sup(x) >= minconf) {

ruleMap.put(x,

y);

}

x = "";

y = "";

}

}

public void ruleGen() {

String s;

Iterator iterator =

maxFrequency.iterator();

while (iterator.hasNext())

{

s = (String)

iterator.next();

subGen(s);

}

}

// for test

public void print1() {

Iterator temp =

candidateSet[0].iterator();

while (temp.hasNext())

System.out.println(temp.next());

}

// for test

public void print2() {

Iterator temp =

frequencySet[0].iterator();

while (temp.hasNext())

System.out.println((String)

temp.next());

}

// for test

public void print3() {

canditate_gen(1);

frequent_gen(2);

Iterator temp =

candidateSet[1].iterator();

Iterator temp1 =

frequencySet[1].iterator();

while (temp.hasNext())

System.out.println("候选"

+ (String) temp.next());

while (temp1.hasNext())

System.out.println("频繁"

+ (String) temp1.next());

}

public void print_canditate() {

for (int i = 0; i

< frequencySet[0].size(); i++) {

Iterator ix =

candidateSet[i].iterator();

Iterator iy =

frequencySet[i].iterator();

System.out.print("候选集"

+ (i + 1) + ":");

while

(ix.hasNext()) {

System.out.print((String)

ix.next() + "\t");

//System.out.print(toDigit((String)

ix.next()) + "\t");

}

System.out.print("\n"

+ "频繁集" + (i + 1) + ":");

while

(iy.hasNext()) {

System.out.print((String)

iy.next() + "\t");

//System.out.print(toDigit((String)

iy.next()) + "\t");

}

System.out.println();

}

}

private String toDigit(String str) {

if (str != null) {

StringBuffer

temp = new StringBuffer();

for (int i

= 0; i < str.length(); i++) {

char

c = str.charAt(i);

temp.append(((int)

c - 65) + " ");

}

return

temp.toString();

} else {

return

null;

}

}

public String[] getTrans_set() {

return transSet;

}

public void setTrans_set(String[] transSet)

{

transSet = transSet;

}

public double getMinsup() {

return minsup;

}

public void setMinsup(double minsup) {

this.minsup = minsup;

}

public double getMinconf() {

return minconf;

}

public void setMinconf(double minconf) {

this.minconf = minconf;

}

public void run() {

int k = 1;

item1_gen();

do {

k++;

canditate_gen(k);

frequent_gen(k);

} while

(!is_frequent_empty(k));

frequencyIndex = k - 1;

print_canditate();

maxfrequent_gen();

print_maxfrequent();

ruleGen();

rulePrint();

}

public static void main(String[] args) {

Apriori ap = new

Apriori();

ap.run();

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值