五一的三天假期期间,ETL逻辑上出现了一些问题,导致每天需要装入DW的增量数据没有按设计装入。所以需要在ETL后对产生的增量数据进行检查,避免出现某天的增量数据出现丢失后自己被动的处理问题。
需求:如当天ETL的增量数据出现问题,需要让两方面的人或程序(DBA与BI)知晓并及时处理。
1. 在ODS服务器上建立如下库、表:
CREATE
DATABASE
`imm`
/*
!40100 DEFAULT CHARACTER SET utf8
*/
;
-- item_info记录不同项目名称与项目内的抽查表(前提:此表每天均有数据产生),以后如有新项目,只需在此表中添加记录
CREATE TABLE `item_info` (
`id` int ( 11 ) DEFAULT NULL ,
`itemname` varchar ( 64 ) DEFAULT NULL ,
`tabNameChk` char ( 32 ) DEFAULT NULL
) ENGINE = InnoDB DEFAULT CHARSET = utf8;
-- icc表记录每个项目的检查时间与是否成功ETL的状态标志
CREATE TABLE `icc` (
`id` int ( 11 ) NOT NULL AUTO_INCREMENT,
`entertime` datetime DEFAULT NULL ,
`itemname` varchar ( 64 ) DEFAULT NULL ,
`flag` smallint ( 6 ) DEFAULT NULL ,
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8;
-- item_info记录不同项目名称与项目内的抽查表(前提:此表每天均有数据产生),以后如有新项目,只需在此表中添加记录
CREATE TABLE `item_info` (
`id` int ( 11 ) DEFAULT NULL ,
`itemname` varchar ( 64 ) DEFAULT NULL ,
`tabNameChk` char ( 32 ) DEFAULT NULL
) ENGINE = InnoDB DEFAULT CHARSET = utf8;
-- icc表记录每个项目的检查时间与是否成功ETL的状态标志
CREATE TABLE `icc` (
`id` int ( 11 ) NOT NULL AUTO_INCREMENT,
`entertime` datetime DEFAULT NULL ,
`itemname` varchar ( 64 ) DEFAULT NULL ,
`flag` smallint ( 6 ) DEFAULT NULL ,
PRIMARY KEY (`id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8;
2.建立定时检查的procedure与event:
CREATE
PROCEDURE
cc_pro()
BEGIN
DECLARE v_max smallint ;
DECLARE v_title bigint ;
DECLARE v_itemN char ( 32 );
DECLARE v_tab char ( 64 );
DECLARE i smallint ;
SET i = 1 ;
SELECT max (id) INTO v_max FROM imm.item_info;
WHILE i <= v_max DO
SELECT itemname,tabNameChk INTO v_itemN,v_tab FROM imm.item_info WHERE id = i;
SET @dbname = v_itemN;
SET @tabname = v_tab;
SET @s_name = CONCAT( @dbname , ' . ' , @tabname );
SET @tempsql = concat( ' SELECT count(db_id) INTO @v_title FROM ' , @s_name , ' ; ' );
PREPARE _stmt FROM @tempsql ;
EXECUTE _stmt;
DEALLOCATE PREPARE _stmt;
IF ( @v_title > 0 ) THEN
INSERT INTO imm.icc(entertime,itemname,flag) VALUES (now(),v_itemN, 1 );
ELSE
INSERT INTO imm.icc(entertime,itemname,flag) VALUES (now(),v_itemN, 0 );
END IF ;
SET i = i + 1 ;
END WHILE ;
END
BEGIN
DECLARE v_max smallint ;
DECLARE v_title bigint ;
DECLARE v_itemN char ( 32 );
DECLARE v_tab char ( 64 );
DECLARE i smallint ;
SET i = 1 ;
SELECT max (id) INTO v_max FROM imm.item_info;
WHILE i <= v_max DO
SELECT itemname,tabNameChk INTO v_itemN,v_tab FROM imm.item_info WHERE id = i;
SET @dbname = v_itemN;
SET @tabname = v_tab;
SET @s_name = CONCAT( @dbname , ' . ' , @tabname );
SET @tempsql = concat( ' SELECT count(db_id) INTO @v_title FROM ' , @s_name , ' ; ' );
PREPARE _stmt FROM @tempsql ;
EXECUTE _stmt;
DEALLOCATE PREPARE _stmt;
IF ( @v_title > 0 ) THEN
INSERT INTO imm.icc(entertime,itemname,flag) VALUES (now(),v_itemN, 1 );
ELSE
INSERT INTO imm.icc(entertime,itemname,flag) VALUES (now(),v_itemN, 0 );
END IF ;
SET i = i + 1 ;
END WHILE ;
END
定时调用procedure,在此之前检查事件调度器是否开启:
+
--
---------------+-------+
| Variable_name | Value |
+ -- ---------------+-------+
| event_scheduler | ON |
+ -- ---------------+-------+
| Variable_name | Value |
+ -- ---------------+-------+
| event_scheduler | ON |
+ -- ---------------+-------+
CREATE
EVENT `ccpro_exec`
ON
SCHEDULE
EVERY 1 DAY STARTS ' 2011-05-05 07:00:00 '
ON COMPLETION NOT PRESERVE
ENABLE
COMMENT ' 在ETL执行完成后进行增量数据质量检查 '
DO BEGIN
call imm.cc_pro2;
END
EVERY 1 DAY STARTS ' 2011-05-05 07:00:00 '
ON COMPLETION NOT PRESERVE
ENABLE
COMMENT ' 在ETL执行完成后进行增量数据质量检查 '
DO BEGIN
call imm.cc_pro2;
END
以上事件的作用是每天7点钟检视一次各项目的增量情况
3.接下来可以随便写个shell脚本,作用是对当天的检查结果进行邮件发送(以前我喜欢发送到移动139邮箱,不过时下有太多的邮箱提供手机绑定功能。所以象我这样偷懒的可以只发封邮件。)
#!/bin/bash
#scriptname:icc_result.sh
#author:liuming
#datetime: 2011 - 05 - 05
function send_message(){
mysql -u$dbuser -p$passwd -D$dbname -e'select * from icc where date(entertime)=date(now());' > $r_txt
mail -s " `date +%Y-%m-%d_%H:%M` ETL(increment data) status " $to_email <$r_txt
}
function main(){
r_txt = " `pwd`/icc_result.txt "
dbuser = 'xxxx'
passwd = 'xxxx'
dbname = 'imm'
tabname = 'icc'
to_email = 'xxxx@xxx .com'
send_message
}
main
#scriptname:icc_result.sh
#author:liuming
#datetime: 2011 - 05 - 05
function send_message(){
mysql -u$dbuser -p$passwd -D$dbname -e'select * from icc where date(entertime)=date(now());' > $r_txt
mail -s " `date +%Y-%m-%d_%H:%M` ETL(increment data) status " $to_email <$r_txt
}
function main(){
r_txt = " `pwd`/icc_result.txt "
dbuser = 'xxxx'
passwd = 'xxxx'
dbname = 'imm'
tabname = 'icc'
to_email = 'xxxx@xxx .com'
send_message
}
main
4.对于app方面,可提供相关表名,让其在计算之前进行判断。此处省去N个字...