kettle案例——数据清洗与校验(数据一致性处理)

本文介绍了使用Kettle进行数据清洗和校验的案例,包括从MySQL数据库获取Personnel_Information数据,通过值映射转换性别为数值,再用插入/更新控件将清洗后的数据存入新表Personnel_Information_New。同时,创建Kettle作业设置定时器,实现数据同步的自动化执行。
摘要由CSDN通过智能技术生成

1.数据准备

准备一张名为Personnel_Information的数据表,该表中主要记录了500名职员的性别、身高、体重及健康值,在MySQL中输入以下代码:

create table `personnel_information` (
	`USERID` int (11),
	`GENDER` varchar (765),
	`HEIGHT` varchar (765),
	`WEIGHT` varchar (765),
	`INDEX` varchar (765)
); 
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000001','Female','174','96','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000002','Male','189','87','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000003','Female','185','110','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000004','Female','195','104','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000005','Male','149','61','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000006','Male','189','104','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000007','Male','147','92','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000008','Male','154','111','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000009','Male','174','90','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000010','Female','169','103','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000011','Male','195','81','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000012','Female','159','80','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000013','Female','192','101','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000014','Male','155','51','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000015','Male','191','79','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000016','Female','153','107','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000017','Female','157','110','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000018','Male','140','129','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000019','Male','144','145','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000020','Male','172','139','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000021','Male','157','110','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000022','Female','153','149','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000023','Female','169','97','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000024','Male','185','139','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000025','Female','172','67','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000026','Female','151','64','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000027','Male','190','95','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000028','Male','187','62','1');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000029','Female','163','159','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000030','Male','179','152','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000031','Male','153','121','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000032','Male','178','52','1');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000033','Female','195','65','1');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000034','Female','160','131','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000035','Female','157','153','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000036','Female','189','132','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000037','Female','197','114','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000038','Male','144','80','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000039','Female','171','152','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000040','Female','185','81','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000041','Female','175','120','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000042','Female','149','108','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000043','Male','157','56','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000044','Male','161','118','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000045','Female','182','126','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000046','Male','185','76','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000047','Female','188','122','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000048','Male','181','111','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000049','Male','161','72','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000050','Male','140','152','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000051','Female','168','135','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000052','Female','176','54','1');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000053','Male','163','110','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000054','Male','172','105','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000055','Male','196','116','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000056','Female','187','89','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000057','Male','172','92','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000058','Male','178','127','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000059','Female','164','70','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000060','Male','143','88','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000061','Female','191','54','0');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000062','Female','141','143','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000063','Male','193','54','0');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000064','Male','190','83','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000065','Male','175','135','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000066','Female','179','158','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000067','Female','172','96','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000068','Female','168','59','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000069','Female','164','82','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000070','Female','194','136','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000071','Female','153','51','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000072','Male','178','117','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000073','Male','141','80','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000074','Male','180','75','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000075','Female','185','100','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000076','Female','197','154','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000077','Male','165','104','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000078','Female','168','90','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000079','Female','176','122','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000080','Male','181','51','0');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000081','Male','164','75','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000082','Female','166','140','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000083','Female','190','105','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000084','Male','186','118','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000085','Male','168','123','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000086','Male','198','50','0');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000087','Female','175','141','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000088','Male','145','117','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000089','Female','159','104','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000090','Female','185','140','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000091','Female','178','154','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000092','Female','183','96','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000093','Female','194','111','3');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000094','Male','177','61','2');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000095','Male','197','119','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000096','Female','170','156','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000097','Male','142','69','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000098','Male','160','139','5');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000099','Male','195','69','1');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000100','Female','190','50','0');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000101','Male','199','156','4');
insert into `personnel_information` (`USERID`, `GENDER`, `HEIGHT`, `WEIGHT`, `INDEX`) values('00000000102','Male','154','105',&
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值