1、经济商务数据的按周计算
可以借鉴之处:
当前当年第几周的获取;对应周的周一和周末的日期的获取;mysql文本数据的装载。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/ouyjxQuery.py
2、idfa与日志比对进行激活及充值数据计算
可以借鉴之处:
特定目录下文件名的获取;Hive自动建表及分区的自动添加;根据特定目录下的文件自动装载数据到Hive分区;装载文本数据到Mysql并设定装载时的字符集。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/idfaQuery.py
可以借鉴之处:
当前当年第几周的获取;对应周的周一和周末的日期的获取;mysql文本数据的装载。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/ouyjxQuery.py
# -*- coding=utf-8 -*-
import datetime
import os
import warnings
import sys
reload(sys)
sys.setdefaultencoding('utf8')
warnings.filterwarnings("ignore")
def getNowYearLastWeek():
# 当前时间年第几周的计算
timenow = datetime.datetime.now() - datetime.timedelta(days=7)
NowYearWeek = timenow.isocalendar()
return str(NowYearWeek[0])+"#"+str(NowYearWeek[1])
def getWeekFristLastDay(weekflag):
yearnum = weekflag[0:4] # 取到年份
weeknum = weekflag[5:7] # 取到周
stryearstart = yearnum + '0101' # 当年第一天
yearstart = datetime.datetime.strptime(stryearstart, '%Y%m%d') # 格式化为日期格式
yearstartcalendarmsg = yearstart.isocalendar() # 当年第一天的周信息
yearstartweekday = yearstartcalendarmsg[2]
yearstartyear = yearstartcalendarmsg[0]
if yearstartyear < int(yearnum):
daydelat = (8 - int(yearstartweekday)) + (int(weeknum) - 1) * 7
else:
daydelat = (8 - int(yearstartweekday)) + (int(weeknum) - 2) * 7
week1day = (yearstart + datetime.timedelta(days=daydelat)).date().strftime('%Y-%m-%d')
week7day = (yearstart + datetime.timedelta(days=daydelat + 6)).date().strftime('%Y-%m-%d')
return week1day, week7day
def dataQuery():
lastWeekMonday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[0]
lastWeekSunday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[1]
yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d')
# 计算对应数据的充值情况 向目录路径写进结果数据
os.system("""source /etc/profile; \
/usr/lib/hive-current/bin/hive -e "drop table if exists xxx_temp_tab_user_info; \
create table xxx_temp_tab_user_info as \
select a2.nickname,a1.id room_id,a2.uid \
from oss_room_v2 a1 \
left join oss_chushou_user_profile a2 on a1.creator_uid=a2.uid \
where a1.pt_day='{yesterday}' and a2.pt_day='{yesterday}' \
and a1.id in(21785,39849,3755657,54321,66629,18920,1180100,54001,55003,55001,41621,20185,55888,66601,52065) \
; \
drop table if exists xxx_temp_tab_addsubscriber; \
create table xxx_temp_tab_addsubscriber as \
select a.room_id,a.uid,a.state,substr(a.created_time,1,10) subscriber_day,case when substr(a.created_time,1,10)=substr(b.created_time,1,10) and a.state=0 then 1 else 0 end newuser_addsubscriber_flag \
from oss_room_subscriber_roomid a \
left join oss_chushou_user_profile b on a.uid=b.uid \
where a.pt_day='{yesterday}' and b.pt_day='{yesterday}' and a.state=0 and substr(a.created_time,1,10) between '{lastWeekMonday}' and '{lastWeekSunday}'; \
drop table if exists xxx_temp_tab_cancelsubscriber; \
create table xxx_temp_tab_cancelsubscriber as \
select a.room_id,a.uid,a.state,substr(a.updated_time,1,10) cancel_subscriber_day,case when substr(a.updated_time,1,10)=substr(b.created_time,1,10) and a.state=-1 then 1 else 0 end newuser_cancel_subscriber_flag \
from oss_room_subscriber_roomid a \
left join oss_chushou_user_profile b on a.uid=b.uid \
where a.pt_day='{yesterday}' and b.pt_day='{yesterday}' and a.state=-1 and substr(a.updated_time,1,10) between '{lastWeekMonday}' and '{lastWeekSunday}'; \
drop table if exists xxx_temp_tab_frist_subscriber; \
create table xxx_temp_tab_frist_subscriber as \
select room_id,uid,state,created_time \
from (select room_id,uid,state,created_time,row_number()over(partition by uid order by created_time) rk \
from (select room_id,uid,state,created_time from oss_room_subscriber_roomid where pt_day='{yesterday}') x) xx \
where rk=1; \
drop table if exists xx0809_newadd_user_addsubscriber; \
create table xx0809_newadd_user_addsubscriber as \
select a1.nickname,a1.room_id,a1.uid, \
a2.subscriber_day,a2.newuser_addsubscriber_flag,case when a3.room_id is not null then 1 else 0 end newuser_addsubscriber_frist_flag, \
a2.uid addsubscriber_uid \
from xxx_temp_tab_user_info a1 \
left join xxx_temp_tab_addsubscriber a2 on a1.room_id=a2.room_id \
left join xxx_temp_tab_frist_subscriber a3 on a2.room_id=a3.room_id and a2.uid=a3.uid \
; \
drop table if exists xx0809_newadd_user_cancelsubscriber; \
create table xx0809_newadd_user_cancelsubscriber as \
select a1.nickname,a1.room_id,a1.uid, \
a3.cancel_subscriber_day,a3.newuser_cancel_subscriber_flag, \
a3.uid cancel_subscriber_uid \
from xxx_temp_tab_user_info a1 \
left join xxx_temp_tab_cancelsubscriber a3 on a1.room_id=a3.room_id \
; \
drop table if exists xx0809_ouy_result_addsubscriber; \
create table xx0809_ouy_result_addsubscriber as \
select a1.nickname,a1.room_id,a1.uid,a1.subscriber_day, \
count(newuser_addsubscriber_flag) total_addsubscriber_flag, \
sum(newuser_addsubscriber_flag) newuser_addsubscriber_cnt, \
sum(newuser_addsubscriber_frist_flag) newuser_addsubscriber_frist_flag, \
sum(case when a1.newuser_addsubscriber_flag=1 then a2.message_cnt else null end) message_send_in_thisRoom, \
sum(case when a1.newuser_addsubscriber_flag=1 then amount else 0 end) pay_amount, \
a4.rk \
from xx0809_newadd_user_addsubscriber a1 \
left join (select roomid,uid,pt_day,count(*) message_cnt from oss_chushou_message_send where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by roomid,uid,pt_day) a2 on a1.room_id=int(a2.roomid) and a1.addsubscriber_uid=int(a2.uid) and a1.subscriber_day=a2.pt_day \
left join (select uid,pt_day,sum(amount) amount from data_chushou_pay_info where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by uid,pt_day) a3 on a1.addsubscriber_uid=int(a3.uid) and a1.subscriber_day=a3.pt_day \
left join (select room_id,last_time pt_day,rank rk from data_chushou_room_category_rank where category_id=0) a4 on a1.room_id=a4.room_id and a1.subscriber_day=a4.pt_day \
group by a1.nickname,a1.room_id,a1.uid,a1.subscriber_day,a4.rk; \
drop table if exists xx0809_ouy_result_cancelsubscriber; \
create table xx0809_ouy_result_cancelsubscriber as \
select a1.nickname,a1.room_id,a1.uid,a1.cancel_subscriber_day, \
count(newuser_cancel_subscriber_flag) total_cancelsubscriber_flag, \
sum(newuser_cancel_subscriber_flag) newuser_cancelsubscriber_cnt, \
sum(case when a1.newuser_cancel_subscriber_flag=1 then a2.message_cnt else null end) message_send_in_thisRoom, \
sum(case when a1.newuser_cancel_subscriber_flag=1 then amount else 0 end) pay_amount, \
a4.rk \
from xx0809_newadd_user_cancelsubscriber a1 \
left join (select roomid,uid,pt_day,count(*) message_cnt from oss_chushou_message_send where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by roomid,uid,pt_day) a2 on a1.room_id=int(a2.roomid) and a1.cancel_subscriber_uid=int(a2.uid) and a1.cancel_subscriber_day=a2.pt_day \
left join (select uid,pt_day,sum(amount) amount from data_chushou_pay_info where pt_day between '{lastWeekMonday}' and '{lastWeekSunday}' group by uid,pt_day) a3 on a1.cancel_subscriber_uid=int(a3.uid) and a1.cancel_subscriber_day=a3.pt_day \
left join (select room_id,last_time pt_day,rank rk from data_chushou_room_category_rank where category_id=0) a4 on a1.room_id=a4.room_id and a1.cancel_subscriber_day=a4.pt_day \
group by a1.nickname,a1.room_id,a1.uid,a1.cancel_subscriber_day,a4.rk; \
" """.format(lastWeekMonday=lastWeekMonday, lastWeekSunday=lastWeekSunday, yesterday=yesterday));
def resultLoadMysql():
lastWeekMonday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[0]
lastWeekSunday = getWeekFristLastDay(weekflag=getNowYearLastWeek())[1]
os.system("""source /etc/profile; \
/usr/lib/hive-current/bin/hive -e " \
select a1.nickname,a1.room_id,a1.uid,a1.subscriber_day data_day,a1.total_addsubscriber_flag total_addsubscriber_cnt,a1.newuser_addsubscriber_cnt,newuser_addsubscriber_frist_flag newuser_addsubscriber_frist_cnt,a1.message_send_in_thisRoom,a1.pay_amount,a2.total_cancelsubscriber_flag total_cancelsubscriber_cnt,newuser_cancelsubscriber_cnt,a1.rk \
from xx0809_ouy_result_addsubscriber a1 \
left join xx0809_ouy_result_cancelsubscriber a2 on a1.room_id=a2.room_id and a1.uid=a2.uid and a1.subscriber_day=a2.cancel_subscriber_day \
order by a1.room_id,data_day;" > /home/mysqlUser/nisj/automationDemand/ouyjx/resultTmp/result.txt""")
os.system("""source /etc/profile; \
/usr/bin/mysql -h199.199.199.199 -P6603 -umysqlUser -pmysqlPass -e "use funnyai_data; \
delete from funnyai_data.static_ouy_byday_everweek where data_day between '{lastWeekMonday}' and '{lastWeekSunday}'; \
load data local infile '/home/mysqlUser/nisj/automationDemand/ouyjx/resultTmp/result.txt' ignore into table funnyai_data.static_ouy_byday_everweek character set utf8 (nickname,room_id,uid,data_day,total_addsubscriber_cnt,newuser_addsubscriber_cnt,newuser_addsubscriber_frist_cnt,message_send_in_thisRoom,pay_amount,total_cancelsubscriber_cnt,newuser_cancelsubscriber_cnt,rk); \
" """.format(lastWeekMonday=lastWeekMonday, lastWeekSunday=lastWeekSunday))
# fields terminated by '\\t' enclosed by '"' lines terminated by '\\n'
# Batch Test
dataQuery()
resultLoadMysql()
2、idfa与日志比对进行激活及充值数据计算
可以借鉴之处:
特定目录下文件名的获取;Hive自动建表及分区的自动添加;根据特定目录下的文件自动装载数据到Hive分区;装载文本数据到Mysql并设定装载时的字符集。
/Users/nisj/PycharmProjects/BiDataProc/IdfaQuery/idfaQuery.py
# -*- coding=utf-8 -*-
import os
import warnings
import datetime
import sys
reload(sys)
sys.setdefaultencoding('utf8')
warnings.filterwarnings("ignore")
def file_path(file_dir):
for root, dirs, files in os.walk(file_dir):
# print(root)
# print(dirs)
return (files)
def partitionAdd_DataLoad():
src_file_dir = '/home/mysqlUser/nisj/automationDemand/idfaQuery/srcCsvData'
partitionAddDataLoadAllSql = " \
drop table if exists xxxauto_idfa; \
CREATE TABLE xxxauto_idfa (active_time string, \
campaign_id string, \
tdid string, \
idfa string, \
android_id string, \
advertiser_id string, \
active_ip string, \
active_type string, \
device_type string, \
click_ip string, \
click_time string, \
creative_id string, \
keyword_id string, \
imei string) \
PARTITIONED BY (class string) \
ROW FORMAT DELIMITED \
FIELDS TERMINATED BY ',' \
LINES TERMINATED BY '\n' \
STORED AS TEXTFILE;"
for srcFile in file_path(src_file_dir):
srcFileKey = srcFile.replace('.csv', '')
partitionAddDataLoadSql = "alter table xxxauto_idfa add partition (class='{srcFileKey}'); \
load data local inpath '/home/mysqlUser/nisj/automationDemand/idfaQuery/srcCsvData/{srcFileKey}.csv' overwrite into table xxxauto_idfa partition(class='{srcFileKey}');".format(srcFileKey=srcFileKey)
partitionAddDataLoadAllSql = partitionAddDataLoadAllSql + partitionAddDataLoadSql
# 装载数据
os.system("""source /etc/profile; \
/usr/lib/hive-current/bin/hive -e " \
{partitionAddDataLoadAllSql}
" \
""".format(partitionAddDataLoadAllSql=partitionAddDataLoadAllSql))
def dataQuery(dateStart, dateEnd, batchFlag):
batchFlag = batchFlag + '' + datetime.datetime.today().strftime('%Y-%m-%d')
# 计算对应数据的充值情况 向目录路径写进结果数据
os.system("""source /etc/profile; \
/usr/lib/hive-current/bin/hive -e " \
add jar /home/mysqlUser/nisj/udf-jar/mysqlUser_udf_radixChange.jar; \
create temporary function RadixChange as 'com.kascend.mysqlUser.RadixChange'; \
with tab_idfa_uid as (select distinct a1.idfa,RadixChange(lower(a1.uid),16,10) uid,a2.class,a1.pt_day \
from (select distinct x1.uid,x2.idfa,x1.pt_day \
from (select distinct identifier,uid,pt_day from bi_all_access_log where pt_day between '{dateStart}' and '{dateEnd}') x1 \
inner join (select distinct identifier,idfa,pt_day from bi_all_identifier_idfa_log where pt_day between '{dateStart}' and '{dateEnd}') x2 on x1.identifier=x2.identifier and x1.pt_day=x2.pt_day) a1 \
inner join xxxauto_idfa a2 on a1.idfa=a2.idfa and a1.pt_day=substr(a2.active_time,1,10)) \
select '{batchFlag}' batchFlag,a1.pt_day,a2.class,sum(a1.amount) pay_amount,count(distinct a1.uid) pay_cnt \
from data_chushou_pay_info a1 \
inner join tab_idfa_uid a2 on a1.uid=a2.uid \
where a1.state=0 and a1.pt_day between '{dateStart}' and '{dateEnd}' and a2.idfa<>'00000000-0000-0000-0000-000000000000' \
and a1.pt_day>=a2.pt_day \
group by a1.pt_day,a2.class;" > /home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaPayInfo.txt """.format(dateStart=dateStart, dateEnd=dateEnd, batchFlag=batchFlag));
# 与日志文件进行比对
os.system("""source /etc/profile; \
/usr/lib/hive-current/bin/hive -e " \
with tab_idfa_uid as (select distinct x1.uid,x2.idfa,x1.identifier \
from (select distinct identifier,uid from bi_all_access_log where pt_day between '{dateStart}' and '{dateEnd}') x1 \
inner join (select distinct identifier,idfa from bi_all_identifier_idfa_log where pt_day between '{dateStart}' and '{dateEnd}') x2 on x1.identifier=x2.identifier \
) \
select '{batchFlag}' batchFlag,a2.class,count(distinct a2.idfa) valid_idfa_inexcel_cnt,count(distinct a1.idfa) relation_idfa_inlog_cnt,count(distinct a1.uid) relation_uid_inlog_cnt,count(distinct a1.identifier) relation_identifier_inlog_cnt \
from xxxauto_idfa a2 \
left join tab_idfa_uid a1 on a1.idfa=a2.idfa \
where a2.idfa<>'00000000-0000-0000-0000-000000000000' \
group by a2.class;" > /home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaRelationInfo.txt """.format(dateStart=dateStart, dateEnd=dateEnd, batchFlag=batchFlag));
def resultLoadMysql(batchFlag):
batchFlag = batchFlag + '' + datetime.datetime.today().strftime('%Y-%m-%d')
os.system("""source /etc/profile; \
/usr/bin/mysql -h199.199.199.199 -P6603 -umysqlUser -pmysqlPass --default-character-set=utf8 -e "use funnyai_data; \
delete from funnyai_data.static_idfa_pay_info where batchFlag = '{batchFlag}'; \
delete from funnyai_data.static_idfa_relation_info where batchFlag = '{batchFlag}'; \
load data local infile '/home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaPayInfo.txt' ignore into table funnyai_data.static_idfa_pay_info character set utf8 (batchFlag,pt_day,class,pay_amount,pay_uid_cnt); \
load data local infile '/home/mysqlUser/nisj/automationDemand/idfaQuery/targetXlsData/idfaRelationInfo.txt' ignore into table funnyai_data.static_idfa_relation_info character set utf8 (batchFlag,class,valid_idfa_inexcel_cnt,relation_idfa_inlog_cnt,relation_uid_inlog_cnt,relation_identifier_inlog_cnt); \
" """.format(batchFlag=batchFlag))
dateStart = '2017-08-14'
dateEnd = '2017-08-20'
batchFlag = 'ChenM比对(2017-08-14~2017-08-20)'
partitionAdd_DataLoad()
dataQuery(dateStart, dateEnd, batchFlag)
resultLoadMysql(batchFlag)