2.启动zk
hdfs
yarn
hive
3.拆分函数
注册函数
hive>
add jar /soft/hive/lib/umeng_hive.jar ;
创建函数
drop function forkstartuplogs ;
drop function forkeventlogs ;
drop function forkerrorlogs ;
drop function forkusagelogs ;
drop function forkpagelogs ;
create function forkstartuplogs as 'com.oldboy.umeng.hive.udtf.ForkStartuplogsUDTF' ;
create function forkeventlogs as 'com.oldboy.umeng.hive.udtf.ForkEventlogsUDTF' ;
create function forkerrorlogs as 'com.oldboy.umeng.hive.udtf.ForkErrorlogsUDTF' ;
create function forkusagelogs as 'com.oldboy.umeng.hive.udtf.ForkUsagelogsUDTF' ;
create function forkpagelogs as 'com.oldboy.umeng.hive.udtf.ForkPagelogsUDTF' ;
测试函数
select forkeventlogs(servertimes , clienttimems , clientip, json) from raw_logs limit10;
注册完可在mysql中查看函数注册元数据
cmd下
mysql -h s101 -uroot -proot
mysql中注册永久函数
4.分区转储s到5个表的sql语句 hive下启动的是MR
insert into appstartuplogs partition(ym , day) select t.appchannel, t.appid, t.appplatform, t.appversion, t.brand, t.carrier, t.country , t.createdatms, t.deviceid , t.devicestyle, t.ipaddress, t.network , t.ostype , t.province, t.screensize, t.tenantid, date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkstartuplogs(servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;
insert into appeventlogs partition(ym , day) select t.appchannel , t.appid , t.appplatform , t.appversion , t.brand , t.createdatms , t.deviceid , t.devicestyle , t.eventdurationsecs , t.eventid , t.ostype , t.tenantid , date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkeventlogs(servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;
insert into apperrorlogs partition(ym , day) select t.appchannel , t.appid , t.appplatform , t.appversion , t.brand , t.createdatms , t.deviceid , t.devicestyle , t.errorbrief , t.errordetail , t.ostype , t.tenantid , date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkerrorlogs(servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;
insert into appusagelogs partition(ym , day) select t.appchannel , t.appid , t.appplatform , t.appversion , t.brand , t.createdatms , t.deviceid , t.devicestyle , t.ostype , t.singledownloadtraffic , t.singleuploadtraffic , t.singleusedurationsecs , t.tenantid , date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkusagelogs (servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;
insert into apppagelogs partition(ym , day) select t.appchannel , t.appid , t.appplatform , t.appversion , t.brand , t.createdatms , t.deviceid , t.devicestyle , t.nextpage , t.ostype , t.pageid , t.pageviewcntinsession , t.staydurationsecs , t.tenantid , t.visitindex , date_format(cast(t.createdatms as timestamp) , 'yyyyMM') , date_format(cast(t.createdatms as timestamp) , 'dd') from ( select forkpagelogs (servertimestr ,clienttimems ,clientip ,json) from raw_logs limit 50 ) t;
结果查看