代码
%本文件生成了频繁项集
clear all;
clc;
TotalSheet=[];
dbstop if error;
brand_name=csvread('./data/data.csv');
brand_name=brand_name(:,2);
brand_name=unique(brand_name);
thconf=0.6;%最小置信度阈值
load('./data/table');
X=table;
[m,n]=size(X);
D=X;
th=ceil(0.1*m);
%%
%寻找频繁1项集
C1=sum(D);
C1=C1';
C1=[[1:n]' C1];
L1=C1;
support=L1(:,2);
L1(find(support<th),:)=[];
TotalSheet{1}=L1;
%%
%生成频繁2项集
C2=combntns(L1(:,1),2);
temp=[];
for i=1:size(C2)
%项目
temp{i,1}=C2(i,:);
%支持数
sum=0;
[p,q]=size(D);
for j=1:p
if all(D(j,C2(i,:)))
sum=sum+1;
end
end
temp{i,2}=sum;
end
C2=temp;
L2=C2;
support=cell2mat(L2(:,2));
index=find(support<th);
L2(index,:)=[];
TotalSheet{2}=L2;
%%
for k=3:6
%生成频繁k(k>=3)项集
L3=[];
C3=cell2mat(L2(:,1));
[mm,nn]=size(C3);
item_num=mm;
if item_num<2
break;
end
comb_index=combntns(1:item_num,2);
%按排列组合的方式进行联合
j=1;
for i=1:size(comb_index,1)
comb_temp=union(L2{comb_index(i,:),1});
len=size(comb_temp,1);
len2=size(L2{1,1},1);
%判断是否只差一个元素
if len~=len2+1
continue;
end
%判断子集是否都存在
sub_comb=combntns(comb_temp,len-1);
if size(intersect(sub_comb,C3,'rows'),1)~=size(sub_comb,1)
continue;
end
L3{j,1}=comb_temp;
j=j+1;
end
C3=[];
for i=1:size(L3,1)
C3(i,:)=L3{i,1};
end
temp=[];
for i=1:size(C3)
%项目
temp{i,1}=C3(i,:);
%支持数
sum=0;
for j=1:m
if all(D(j,C3(i,:)))
sum=sum+1;
end
end
temp{i,2}=sum;
end
L3=temp;
if size(L3,1)==0
break;
end
support=cell2mat(L3(:,2));
index=find(support<th);
L3(index,:)=[];
TotalSheet{k}=L3;
L2=L3;
k
end