具体算法实现细节请查看IEEE论文:Set-oriented mining for association rules in relational databases
SETM算法代码+测试用例+具体操作步骤下载戳这http://download.csdn.net/detail/michealtx/4266085
sql语言实现,找出来所有的频繁模式。代码如下:
---------------最终版------------
--drop table c1,c2,c3,c4,r1,r2,r3,r4
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
--为SETMAlgorithm数据库创建空表r1
create table R1
(
tx_id int,
item1 nvarchar(50)
)
--把setmbackup数据库的sales_data表中导入数据到r1中
insert into R1
select *
from SETMBackup.dbo.SALES_DATA
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------------------------
--------------------setm算法过程---------------
--将R1按item1排序
select * into #temp from R1--把R1内容存到临时表temp中
delete from R1--删除R1
insert R1 select * from #temp order by item1--将临时表temp按item1升序排序后插入到R1
drop table #temp--删除临时表temp
--声明最小支持度MinSupport
declare @minimum_support int
set @minimum_support=3
declare @MinSupport varchar(10)
set @MinSupport=cast(@minimum_support as varchar(10))
--声明变量k
declare @k int
set @k=1
--由R1生成C1
declare @CK varchar(100)
set @CK='C'+cast(@k as varchar(100))
declare @CreateTable varchar(100)
set @CreateTable=' create table '+@CK+' (item1 nvarchar(10),cnt int) '
declare @RK varchar(10)
set @RK='R'+cast(@k as varchar(100))
declare @InsertInto varchar(100)
set @InsertInto=' insert into '+@CK+' select item1,COUNT(*) from '+@RK+' group by item1 having COUNT(*)>='+@MinSupport
exec(@CreateTable)
exec(@InsertInto)
print '由R1生成C1'
--进入循环
declare @RKNotEmpty varchar(100)
--set @RKNotEmpty='if not exists(select * from '+@RK+') begin drop table '+@RK+' break end'
declare @KMinusOne int
set @KMinusOne=1
declare @RKMinusOne varchar(10)
set @RKMinusOne='R'+cast(@KMinusOne as varchar(100))
while 1=1
begin
print '进入循环'
set @KMinusOne=@k
set @RKMinusOne='R'+cast(@KMinusOne as varchar(100))
set @k=@k+1
set @RK='R'+cast(@k as varchar(100))
----------------------------------------------------
-----------------将R[k-1]按全部列排序---------------
declare @str1 varchar(MAX)
set @str1=
'declare @max int
select @max=count(name)
from syscolumns
where id=object_id('''+@RKMinusOne+''')
--print @max
declare @min int
set @min=1
declare @tt varchar(max)
set @tt=''''
while @min<=@max
begin
declare @name varchar(MAX)
select @name=name from(
select row_number()over(order by getdate()) as num,
name from syscolumns where id=object_id('''+@RKMinusOne+'''))a
where num=@min
set @tt=ISNULL(@tt,'','')+@name+'',''
set @min=@min+1
end
--print @tt
declare @ss varchar(max)
set @ss=''''
set @ss=''select * into #temp from '+@RKMinusOne+' order by ''
+LEFT(@tt,LEN(@tt)-1)+'' delete from '+@RKMinusOne+' insert '+@RKMinusOne+' select * from #temp drop table #temp''
exec(@ss)
'
exec(@str1)
print '将R[k-1]按全部列排序完成'
--扫描R[k-1]和R1将符合条件者合并生成RKTemp(即R'[K])
declare @num int
set @num=1
declare @str varchar(max)
set @str='create table RKTemp('
declare @sql varchar(1000)
set @sql='tx_id int,'
while @num<=@k
begin
set @sql=ISNULL(@sql,',')+'item'+CAST(@num as varchar)+' nvarchar(50),'
set @num=@num+1
end
set @str=@str+left(@sql,LEN(@sql)-1)+')'
exec(@str)
declare @str2 varchar(max)
set @str2=
'
insert into rktemp
select '+@RKMinusOne+'.*,myr1.item1
from '+@RKMinusOne+', R1 myr1
where '+@RKMinusOne+'.tx_id=myr1.tx_id and myr1.item1>'+@RKMinusOne+'.item'+cast(@KMinusOne as varchar(10))
exec(@str2)
print '扫描R[k-1]和R1将符合条件者合并生成Rktemp完成'
---------------将RKTemp(即R'[K])按全部item列排序--------------------
set @str1=
'declare @max int
select @max=count(name)
from syscolumns
where id=object_id('''+'RKTemp'+''')
--print @max
declare @min int
set @min=2
declare @tt varchar(max)
set @tt=''''
while @min<=@max
begin
declare @name varchar(MAX)
select @name=name from(
select row_number()over(order by getdate()) as num,
name from syscolumns where id=object_id('''+'RKTemp'+'''))a
where num=@min
set @tt=ISNULL(@tt,'','')+@name+'',''
set @min=@min+1
end
--print @tt
declare @ss varchar(max)
set @ss=''''
set @ss=''select * into #temp from '+'RKTemp'+' order by ''
+LEFT(@tt,LEN(@tt)-1)+'' delete from '+'RKTemp'+' insert '+'RKTemp'+' select * from #temp drop table #temp''
exec(@ss)
'
exec(@str1)
print '将RKTemp按全部item列排序完成'
-------------------用RKTemp(即R'[k])计算支持度生成C[K]-------------------
declare @number int
set @number=1
set @CK='C'+cast(@k as varchar(100))
declare @string varchar(max)
set @string='create table '+@CK+'('
declare @sqlstring varchar(1000)
set @sqlstring=''
while @number<=@k
begin
set @sqlstring=ISNULL(@sqlstring,',')+'item'+CAST(@number as varchar)+' nvarchar(50),'
set @number=@number+1
end
set @sqlstring=@sqlstring+'cnt int,'
--print left(@sqlstring,LEN(@sqlstring)-1)
set @string=@string+left(@sqlstring,LEN(@sqlstring)-1)+')'
exec(@string)
declare @group varchar(max)
set @group=''
set @number=1
while @number<=@k
begin
set @group=ISNULL(@group,',')+'item'+CAST(@number as varchar)+','
set @number=@number+1
end
set @group=left(@group,LEN(@group)-1)
--print '@group='+@group
declare @string2 varchar(max)
set @string2=
'
insert into '+@CK+'
select '+@group+',count(*)
from RKTemp
group by '+@group+'
having count(*)>='+@MinSupport
--print '@string2='+@string2
exec(@string2)
print '用RKTemp计算支持度生成C[K]完成'
-----------------用RKTemp和C[K]生成R[K]--------------------------
declare @numbert int
set @numbert=1
set @RK='R'+cast(@k as varchar(100))
declare @stringt varchar(max)
set @stringt='create table '+@RK+'('
declare @sqlstringt varchar(1000)
set @sqlstringt='tx_id int,'
while @numbert<=@k
begin
set @sqlstringt=ISNULL(@sqlstringt,',')+'item'+CAST(@numbert as varchar)+' nvarchar(50),'
set @numbert=@numbert+1
end
--print left(@sqlstringt,LEN(@sqlstringt)-1)
set @stringt=@stringt+left(@sqlstringt,LEN(@sqlstringt)-1)+')'
exec(@stringt)
declare @string3 varchar(max)
set @string3=
'
insert into '+@RK+'
select RKTemp.*
from RKTemp,'+@CK+'
where RKTemp.item1='+@CK+'.item1 and RKTemp.item'+cast(@k as varchar(10))+'='+@CK+'.item'+cast(@k as varchar(10))
--print '@string3='+@string3
exec(@string3)
print '用RKTemp和C[K]生成R[K]完成'
-------------------R'[k]用完要删除----------------
drop table RKTemp
----------------------@RKNotEmpty重新赋值,若RK为空,则退出循环-------------
set @RKNotEmpty='select tx_id into panduan from '+@RK
exec(@RKNotEmpty)
if not exists(select * from panduan)
begin
drop table panduan
print @RK+'为空,跳出循环啦!'
break
end
else
drop table panduan
end
----------------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------------------------------