OLAP的第一步就是从业务系统中抽取数据到数据仓库系统。
除了ETL工具Kettle,也可以使用PL/SQL
创建Source表,模拟业务系统的数据表。
create table source as select sys_guid() id ,o.* from dba_objects o where rownum<1000;
alter table source add constraint PK_source primary key (id);
创建Target表,模拟数据仓库中的表。
create table target as select * from source where 1=0;
alter table target add constraint PK_target primary key (id);
创建DB LINK,remote模拟业务系统的数据库
create database link remote connect to username identified by xxxxxx using 'remote';
因为业务系统的数据是变化的,相较于数据仓库的表,他可能更新了一些数据,也可能修改了一些数据。
比对业务表和数据仓库表的数据,
如果ID相同,并且数据有变化
则根据ID更新数据仓库的表(target)
如果业务系统的数据ID(source),还没有出现在数据仓库的表中(target)
则在数据仓库的表中新增这个记录。
本质都是Oracle Merge的功能,只不过尝试另外几种方法。
1.merge
首先将远程业务系统的数据放入临时表,
然后根据ID更新数据,如果数据的内容没有变化,则不更新。
最后插入业务系统中新建的数据。
为了简单,没有进行内容变化的判断
没有写更新后插入的步骤,插入的实现是相同的。
先插入业务表中新增的记录,然后对比修改。
除了ETL工具Kettle,也可以使用PL/SQL
创建Source表,模拟业务系统的数据表。
create table source as select sys_guid() id ,o.* from dba_objects o where rownum<1000;
alter table source add constraint PK_source primary key (id);
创建Target表,模拟数据仓库中的表。
create table target as select * from source where 1=0;
alter table target add constraint PK_target primary key (id);
创建DB LINK,remote模拟业务系统的数据库
create database link remote connect to username identified by xxxxxx using 'remote';
因为业务系统的数据是变化的,相较于数据仓库的表,他可能更新了一些数据,也可能修改了一些数据。
比对业务表和数据仓库表的数据,
如果ID相同,并且数据有变化
则根据ID更新数据仓库的表(target)
如果业务系统的数据ID(source),还没有出现在数据仓库的表中(target)
则在数据仓库的表中新增这个记录。
本质都是Oracle Merge的功能,只不过尝试另外几种方法。
1.merge
merge into target t using (select * from source@remote) s
on(t.id=s.id)
when matched
then
update set t.owner=s.owner,
t.object_name=s.object_name,
t.subobject_name=s.subobject_name,
t.object_id=s.object_id,
t.data_object_id=s.data_object_id,
t.object_type=s.object_type,
t.created=s.created,
t.last_ddl_time=s.last_ddl_time,
t.timestamp=s.timestamp,
t.status=s.status,
t.temporary=s.temporary,
t.generated=s.generated,
t.secondary=s.secondary,
t.namespace=s.namespace,
t.edition_name=s.edition_name
when not matched
then
insert values
(
s.id,
s.owner,
s.object_name,
s.subobject_name,
s.object_id,
s.data_object_id,
s.object_type,
s.created,
s.last_ddl_time,
s.timestamp,
s.status,
s.temporary,
s.generated,
s.secondary,
s.namespace,
s.edition_name
);
2.全局临时表。on(t.id=s.id)
when matched
then
update set t.owner=s.owner,
t.object_name=s.object_name,
t.subobject_name=s.subobject_name,
t.object_id=s.object_id,
t.data_object_id=s.data_object_id,
t.object_type=s.object_type,
t.created=s.created,
t.last_ddl_time=s.last_ddl_time,
t.timestamp=s.timestamp,
t.status=s.status,
t.temporary=s.temporary,
t.generated=s.generated,
t.secondary=s.secondary,
t.namespace=s.namespace,
t.edition_name=s.edition_name
when not matched
then
insert values
(
s.id,
s.owner,
s.object_name,
s.subobject_name,
s.object_id,
s.data_object_id,
s.object_type,
s.created,
s.last_ddl_time,
s.timestamp,
s.status,
s.temporary,
s.generated,
s.secondary,
s.namespace,
s.edition_name
);
首先将远程业务系统的数据放入临时表,
然后根据ID更新数据,如果数据的内容没有变化,则不更新。
最后插入业务系统中新建的数据。
create global temporary table tmp
on commit preserve rows
as
select * from target where 1=0;
insert into tmp select * from source@remote;
update target t set
(
t.owner,
t.object_name,
t.subobject_name,
t.object_id,
t.data_object_id,
t.object_type,
t.created,
t.last_ddl_time,
t.timestamp,
t.status,
t.temporary,
t.generated,
t.secondary,
t.namespace,
t.edition_name
)
=
(select
tmp.owner,
tmp.object_name,
tmp.subobject_name,
tmp.object_id,
tmp.data_object_id,
tmp.object_type,
tmp.created,
tmp.last_ddl_time,
tmp.timestamp,
tmp.status,
tmp.temporary,
tmp.generated,
tmp.secondary,
tmp.namespace,
tmp.edition_name
from tmp where t.id=tmp.id)
where exists(
select * from tmp where tmp.id=t.id and not (
tmp.owner=t.owner and
tmp.object_name=t.object_name and
tmp.subobject_name=t.subobject_name and
tmp.object_id=t.object_id and
tmp.data_object_id=t.data_object_id and
tmp.object_type=t.object_type and
tmp.created=t.created and
tmp.last_ddl_time=t.last_ddl_time and
tmp.timestamp=t.timestamp and
tmp.status=t.status and
tmp.temporary=t.temporary and
tmp.generated=t.generated and
tmp.secondary=t.secondary and
tmp.namespace=t.namespace and
tmp.edition_name=t.edition_name
)
);
insert into target
select * from tmp where not exists(
select * from target t where t.id=tmp.id);
3.集合处理on commit preserve rows
as
select * from target where 1=0;
insert into tmp select * from source@remote;
update target t set
(
t.owner,
t.object_name,
t.subobject_name,
t.object_id,
t.data_object_id,
t.object_type,
t.created,
t.last_ddl_time,
t.timestamp,
t.status,
t.temporary,
t.generated,
t.secondary,
t.namespace,
t.edition_name
)
=
(select
tmp.owner,
tmp.object_name,
tmp.subobject_name,
tmp.object_id,
tmp.data_object_id,
tmp.object_type,
tmp.created,
tmp.last_ddl_time,
tmp.timestamp,
tmp.status,
tmp.temporary,
tmp.generated,
tmp.secondary,
tmp.namespace,
tmp.edition_name
from tmp where t.id=tmp.id)
where exists(
select * from tmp where tmp.id=t.id and not (
tmp.owner=t.owner and
tmp.object_name=t.object_name and
tmp.subobject_name=t.subobject_name and
tmp.object_id=t.object_id and
tmp.data_object_id=t.data_object_id and
tmp.object_type=t.object_type and
tmp.created=t.created and
tmp.last_ddl_time=t.last_ddl_time and
tmp.timestamp=t.timestamp and
tmp.status=t.status and
tmp.temporary=t.temporary and
tmp.generated=t.generated and
tmp.secondary=t.secondary and
tmp.namespace=t.namespace and
tmp.edition_name=t.edition_name
)
);
insert into target
select * from tmp where not exists(
select * from target t where t.id=tmp.id);
为了简单,没有进行内容变化的判断
declare
type tab is table of target%rowtype;
l_row tab;
cursor cur is select * from source@remote;
begin
open cur;
fetch cur bulk collect into l_row;
close cur;
forall i in 1..l_row.count
update target set row=l_row(i) where id=l_row(i).id;
insert into target select * from source@remote s
where not exists (select * from target t where t.id=s.id);
commit;
end;
/
4.内联视图更新type tab is table of target%rowtype;
l_row tab;
cursor cur is select * from source@remote;
begin
open cur;
fetch cur bulk collect into l_row;
close cur;
forall i in 1..l_row.count
update target set row=l_row(i) where id=l_row(i).id;
insert into target select * from source@remote s
where not exists (select * from target t where t.id=s.id);
commit;
end;
/
没有写更新后插入的步骤,插入的实现是相同的。
update(
select
s.id s1,
s.owner s2,
s.object_name s3,
s.subobject_name s4,
s.object_id s5,
s.data_object_id s6,
s.object_type s7,
s.created s8,
s.last_ddl_time s9,
s.timestamp s10,
s.status s11,
s.temporary s12,
s.generated s13,
s.secondary s14,
s.namespace s15,
s.edition_name s16,
t.id t1,
t.owner t2,
t.object_name t3,
t.subobject_name t4,
t.object_id t5,
t.data_object_id t6,
t.object_type t7,
t.created t8,
t.last_ddl_time t9,
t.timestamp t10,
t.status t11,
t.temporary t12,
t.generated t13,
t.secondary t14,
t.namespace t15,
t.edition_name t16
from target t inner join source@remote s on(s.id=t.id)
where
not
(
s.owner=t.owner and
s.object_name=t.object_name and
s.subobject_name=t.subobject_name and
s.object_id=t.object_id and
s.data_object_id=t.data_object_id and
s.object_type=t.object_type and
s.created=t.created and
s.last_ddl_time=t.last_ddl_time and
s.timestamp=t.timestamp and
s.status=t.status and
s.temporary=t.temporary and
s.generated=t.generated and
s.secondary=t.secondary and
s.namespace=t.namespace and
s.edition_name=t.edition_name
)
)
set
t1=s1,
t2=s2,
t3=s3,
t4=s4,
t5=s5,
t6=s6,
t7=s7,
t8=s8,
t9=s9,
t10=s10,
t11=s11,
t12=s12,
t13=s13,
t14=s14,
t15=s15,
t16=s16
;
5.Minusselect
s.id s1,
s.owner s2,
s.object_name s3,
s.subobject_name s4,
s.object_id s5,
s.data_object_id s6,
s.object_type s7,
s.created s8,
s.last_ddl_time s9,
s.timestamp s10,
s.status s11,
s.temporary s12,
s.generated s13,
s.secondary s14,
s.namespace s15,
s.edition_name s16,
t.id t1,
t.owner t2,
t.object_name t3,
t.subobject_name t4,
t.object_id t5,
t.data_object_id t6,
t.object_type t7,
t.created t8,
t.last_ddl_time t9,
t.timestamp t10,
t.status t11,
t.temporary t12,
t.generated t13,
t.secondary t14,
t.namespace t15,
t.edition_name t16
from target t inner join source@remote s on(s.id=t.id)
where
not
(
s.owner=t.owner and
s.object_name=t.object_name and
s.subobject_name=t.subobject_name and
s.object_id=t.object_id and
s.data_object_id=t.data_object_id and
s.object_type=t.object_type and
s.created=t.created and
s.last_ddl_time=t.last_ddl_time and
s.timestamp=t.timestamp and
s.status=t.status and
s.temporary=t.temporary and
s.generated=t.generated and
s.secondary=t.secondary and
s.namespace=t.namespace and
s.edition_name=t.edition_name
)
)
set
t1=s1,
t2=s2,
t3=s3,
t4=s4,
t5=s5,
t6=s6,
t7=s7,
t8=s8,
t9=s9,
t10=s10,
t11=s11,
t12=s12,
t13=s13,
t14=s14,
t15=s15,
t16=s16
;
先插入业务表中新增的记录,然后对比修改。
declare
type tab is table of target%rowtype;
l_row tab;
cursor cur is select * from source@remote minus select * from target;
begin
insert into target select * from source@remote s
where not exists (select * from target t where t.id=s.id);
open cur;
fetch cur bulk cllect into l_row;
close cur;
forall i in 1..l_row.count
update target set row=low(i) where id=l_row(i).id;
commit;
end;
/
type tab is table of target%rowtype;
l_row tab;
cursor cur is select * from source@remote minus select * from target;
begin
insert into target select * from source@remote s
where not exists (select * from target t where t.id=s.id);
open cur;
fetch cur bulk cllect into l_row;
close cur;
forall i in 1..l_row.count
update target set row=low(i) where id=l_row(i).id;
commit;
end;
/
来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/29254281/viewspace-777461/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/29254281/viewspace-777461/