移动APP渠道质量评价体系

最新推荐文章于 2022-04-15 15:21:55 发布

DVD_D

最新推荐文章于 2022-04-15 15:21:55 发布

阅读量471

点赞数

文章标签： python 大数据

本文链接：https://blog.csdn.net/weixin_43287260/article/details/112185069

版权

一、为什么要做渠道质量评价体系：

结合推广运营目的，评估用户渠道的好坏，好的渠道要拓量，较差的渠道要控制；

二、如何做：

1.根据项目所处阶段构建渠道质量评价指标体系：

新项目-拉新：此阶段尽可能多的获取新用户，新增80%，活跃20%，注意砍掉大量作弊用户，可允许少部分；
下载量、登录量、注册量、登录率、注册率。。。

发展中项目-活跃：新增40%+活跃40%+营收20%；
日活、月活、次留、7留、30留、人均使用时长。。。

成熟项目-营收：新增20%-活跃40%+营收40%；
成本、收入、ROI、创收人数占比。。。

2.数据准备：

1）搜集第三方数据（主要是下载量及部分收入等）及自己数据库数据；

2）计算指标并归一化（Min-Max归一化）；

3）按权重计算总得分；

三、效果：
市场部快速决策，T+3（可T+1）确定优劣渠道

"""
date：2020-11-03
@author: zhuweisj
theme：Browser_app_channels_evaluation
coding：utf-8
"""

# 导入相关包
import numpy as np
from numpy import float64
import pandas as pd
import lightgbm as lgbm
from pyspark.sql import SparkSession
from pyspark.sql.types import *
from datetime import timedelta
import os
import time
from datetime import datetime
import warnings

# 忽略warning
warnings.filterwarnings('ignore')  
pd.options.display.float_format = '{:.2f}'.format

#更改当前工作目录路径
# os.chdir(r'your path')

def channel_evaluation_algo(project,calc_date):
    # temp_result_tb = 'dp_dm.dw_channel_evaluation_{}'.format(project)
    # result_tb = 'dp_dm.dw_channel_evaluation_{}_all'.format(project)
    date = (calc_date - timedelta(days=4)).strftime(f"%Y-%m-%d")
    print("channel_evaluation_algo开始计算：项目：{}, 日期：{}".format(project, date))

    spark = SparkSession.builder.master("yarn").enableHiveSupport().getOrCreate()

    sql = '''select s1.p_dt as p_dt
            ,s1.attribution_channel_id as attribution_channel_id
            ,attribution_new_uv
            ,retain_1d
            ,retain_2d
            ,retain_3d
            ,income
            ,renjunshichang
            ,avg_launch_time
            ,attrib_single_cost
            ,cast(sousuocishu/sousuo as decimal(5,2)) as sousuocishu
            ,cast(yueducishu/yuedu as decimal(5,2)) as yueducishu
            ,cast(xiaoshipincishu/xiaoshipin as decimal(5,2)) as xiaoshipincishu
            ,cast(xiaoshuocishu/xiaoshuo as decimal(5,2)) as xiaoshuocishu
            ,cast(lingxianjincishu/lingxianjin as decimal(5,2)) as lingxianjincishu
            ,cast(duanshipincishu/duanshipin as decimal(5,2)) as duanshipincishu
            from
            (select p_dt,attribution_channel_id,attribution_new_uv
            ,cast(retain_1d_uv/attribution_new_uv as decimal(5,2)) as retain_1d
            ,cast(retain_2d_uv/attribution_new_uv as decimal(5,2)) as retain_2d
            ,cast(retain_3d_uv/attribution_new_uv as decimal(5,2)) as retain_3d
            from
            (select p_dt,attribution_channel_id
            ,sum((case when is_black_attribution_channel=0 then attribution_new_uv end )) as attribution_new_uv 
            ,sum(retain_1d_uv) as retain_1d_uv
            ,sum(retain_2d_uv) as retain_2d_uv
            ,sum(retain_3d_uv) as retain_3d_uv
            from dm_db.dm_common_mobile_device_retains_attribution_aggr_d
            where p_product = 'browser_app'
            and p_dt = ''' + "'" + date + "'" + '''
            and attribution_new_uv is not NULL 
            and attribution_new_uv <> 0
            and attribution_channel_id not in
            (select distinct channel from dw_db.dw_browser_app_mb_brush_cheat_blacklist
            where p_dt = '2020-09-21')
            group by p_dt,attribution_channel_id)t)s1
            left join
            (select t3.p_dt,t3.attribution_channel_id,count(t3.device_id) as renshu,sum(t3.use_time) as shichang
            ,cast(sum(t3.use_time)/count(t3.device_id)as decimal(10,2)) as renjunshichang
            from
            (select t2.p_dt,t1.attribution_channel_id,t1.device_id,t2.use_time
            from
            (select p_dt,attribution_channel_id,device_id
            from dw_db.dw_common_mobile_device_user_mapping
            where p_product = 'browser_app'
            and first_date = p_dt 
            and p_dt = ''' + "'" + date + "'" + '''
            group by p_dt,attribution_channel_id,device_id)t1
            left join
            (select
            a.p_dt,
            a.device_id,
            sum(cast(coalesce(get_json_object(a.json, '$.duration'), 0) as bigint))as use_time
            from dw_db.dw_common_mobile_use_log as a
            where a.p_product='browser_app'
            and a.p_dt  = ''' + "'" + date + "'" + '''
            --and cast(coalesce(get_json_object(a.json, '$.duration'), 0) as bigint)>900 
            and cast(coalesce(get_json_object(a.json, '$.duration'), 0) as bigint)<7200
            and a.device_id<>''
            group by a.p_dt,a.device_id)t2
            on t1.device_id = t2.device_id and t1.p_dt = t2.p_dt
            where t2.device_id is not null)t3
            group by t3.p_dt,t3.attribution_channel_id)s2
            on s1.p_dt = s2.p_dt and s1.attribution_channel_id = s2.attribution_channel_id
            left join
            (select 
            t1.p_dt
            ,t1.attribution_channel_id
            ,count(case when t2.event_name = 'toptitlebar_search' then t1.device_id else null end) sousuo
            ,count(case when t2.event_name in ('news_item_click','newssdk_all_news_item_click') then t1.device_id else null end) yuedu
            ,count(case when t2.event_name = 'navbar_click_smallvideo' then t1.device_id else null end) xiaoshipin
            ,count(case when t2.event_name = 'navbar_click_h5_http://t.mokayd.com/s/it8ym6y0qo' then t1.device_id else null end) xiaoshuo
            ,count(case when t2.event_name = 'navbar_click_taskpage' then t1.device_id else null end) lingxianjin
            ,count(case when t2.event_name = 'navbar_click_briefvideo' then t1.device_id else null end) duanshipin
            ,sum(case when t2.event_name = 'toptitlebar_search' then t2.event_counts else 0 end) sousuocishu
            ,sum(case when t2.event_name in ('news_item_click','newssdk_all_news_item_click') then t2.event_counts else 0 end) yueducishu
            ,sum(case when t2.event_name = 'navbar_click_smallvideo' then t2.event_counts else 0 end) xiaoshipincishu
            ,sum(case when t2.event_name = 'navbar_click_h5_http://t.mokayd.com/s/it8ym6y0qo' then t2.event_counts else 0 end) xiaoshuocishu
            ,sum(case when t2.event_name = 'navbar_click_taskpage' then t2.event_counts else 0 end) lingxianjincishu
            ,sum(case when t2.event_name = 'navbar_click_briefvideo' then t2.event_counts else 0 end) duanshipincishu
            from
            (select p_dt,device_id,attribution_channel_id
            from dw_db.dw_common_mobile_device_user_mapping 
            where p_product = 'browser_app'
            and p_dt = first_date
            and p_dt = ''' + "'" + date + "'" + ''')t1
            left join
            (select p_dt,device_id,event_name,sum(event_counts) event_counts
            from dw_db.dw_common_mobile_click_log
            where p_product = 'browser_app'
            and p_dt = ''' + "'" + date + "'" + '''
            and event_name in(
            'toptitlebar_search'
            ,'newssdk_all_news_item_click'
            ,'news_item_click'
            ,'navbar_click_smallvideo'
            ,'navbar_click_h5_http://t.mokayd.com/s/it8ym6y0qo'
            ,'navbar_click_taskpage'
            ,'navbar_click_briefvideo'
            )
            and event_counts > 0
            group by p_dt,device_id,event_name)t2
            on t1.device_id = t2.device_id and t1.p_dt = t2.p_dt
            group by t1.p_dt,t1.attribution_channel_id)s3
            on s1.p_dt = s3.p_dt and s1.attribution_channel_id = s3.attribution_channel_id
            left join
            (select attribution_channel_id
            ,attribution_channel_type
            ,avg(attrib_single_cost) as attrib_single_cost
            ,p_dt
            from dm_db.dm_common_appunion_info_export2ck
            where p_product = 'browser_app'
            and attribution_channel_cooperate_status_name in ('正式','测试')
            and p_dt = ''' + "'" + date + "'" + '''
            group by attribution_channel_id
            ,attribution_channel_type
            ,p_dt)s4
            on s1.p_dt = s4.p_dt and s1.attribution_channel_id = s4.attribution_channel_id
            left join
            (select p_dt
            ,attribution_channel_id
            ,count(distinct device_id) as num
            ,sum(launch_num) as launch_num
            ,cast(sum(launch_num)/count(distinct device_id) as decimal(5,1)) as avg_launch_time
            from dw_db.dw_m_common_mobile_device_devices
            where p_product = 'browser_app'
            and is_new = 1
            and p_dt = ''' + "'" + date + "'" + '''
            group by p_dt,attribution_channel_id)s5
            on s1.p_dt = s5.p_dt and s1.attribution_channel_id = s5.attribution_channel_id
            left join
            (select p_dt,attribution_channel_id,income
            from dm_db.dm_common_roi_attribute_roi_export2ck 
            where p_product = 'browser_app' 
            and p_dt = ''' + "'" + date + "'" + ''')s6
            on s1.p_dt = s6.p_dt and s1.attribution_channel_id = s6.attribution_channel_id
            '''

    predict_data = spark.sql(sql).toPandas()
    
    new_uv_data = pd.read_table("all_data_right20201106.txt",index_col=[0],parse_dates=[0], delimiter=',')
    # predict_data = pd.read_csv("1010~1109.csv",index_col=[0],parse_dates=[0])
    
    # 筛选渠道日均新增50以上
    channel_select = pd.DataFrame(new_uv_data.groupby('attribution_channel_id')['attribution_new_uv'].mean())
    select_channel_01 = list(channel_select.iloc[(np.where(channel_select.attribution_new_uv >= 50))].index)
    
    # 筛选渠道时间窗在2020-09-01~2020-09-21之间
    temp_date_min =  pd.DataFrame(new_uv_data.reset_index().groupby('attribution_channel_id')['p_dt'].min()).rename(columns={'p_dt':'min_p_dt'})
    temp_date_max = pd.DataFrame(new_uv_data.reset_index().groupby('attribution_channel_id')['p_dt'].max()).rename(columns={'p_dt':'max_p_dt'})
    temp_date_min_max = pd.concat([temp_date_min, temp_date_max], join='inner', axis = 1)
    select_channel_02 = []
    for channel_id in select_channel_01:
        if (temp_date_min_max.loc[channel_id]['min_p_dt'] == datetime(2020,9,1)) & (temp_date_min_max.loc[channel_id]['max_p_dt'] == datetime(2020,9,21)):
            select_channel_02.append(channel_id)
            
    channel_data = pd.DataFrame(select_channel_02,columns = ['channel_id'])
    
    # 根据选定的渠道，筛选相关渠道指标数据
    all_data = new_uv_data[new_uv_data['attribution_channel_id'].isin(x for x in select_channel_02)]
    all_data.fillna(0, inplace=True)
    all_data_select = all_data[['attribution_channel_id','attribution_new_uv', 'retain_1d',
           'retain_2d', 'retain_3d', 'income', 'renjunshichang', 'avg_launch_time',
           'attrib_single_cost', 'sousuocishu', 'yueducishu', 'xiaoshipincishu',
           'xiaoshuocishu', 'lingxianjincishu', 'duanshipincishu', 'score']]
    predict_data = predict_data.fillna(0)
    predict_data1 = predict_data.iloc[(np.where(predict_data.attribution_new_uv >= 50))].sort_values(['p_dt'])
    predict_data1 = pd.DataFrame(predict_data1)
    predict_data2 = predict_data1.iloc[:, 2:15]
    predict_data2= pd.DataFrame(predict_data2,dtype=np.float64)
    predict_data1.set_index(['attribution_channel_id'],inplace = True)

    errors = []
    # best_params = pd.DataFrame()
    result = pd.DataFrame({'project': list(), 'channel_id': list(), 'score': list(),
                         'p_dt': list()}).set_index('p_dt', inplace=True)
    pred_score = []
    channel_id = []
    p_dt = list(predict_data1['p_dt'])
    
    for i in range(len(predict_data2)):
        channel_data_select = all_data_select
        X_train_valid = channel_data_select.iloc[:, 1:14]
        y_train_valid = channel_data_select['score']     
        X_test=X_train_valid
        y_test = y_train_valid
        x_pre = predict_data2.iloc[[i]]
 
        lgb_train = lgbm.Dataset(X_train_valid, y_train_valid)
        lgb_eval = lgbm.Dataset(X_test, y_test, reference=lgb_train)
        
        real_param = {
            'boosting_type': 'gbdt',
            'objective': 'regression',
            'metric': {'l2', 'l1'},
            'num_leaves': 31,
            'learning_rate': 0.05,
            'feature_fraction': 0.9,
            'bagging_fraction': 0.8,
            'bagging_freq': 5,
            'verbose': 0
            }
        gbm = lgbm.train(real_param,
                        lgb_train,
                        num_boost_round=20,
                        valid_sets=lgb_eval,
                        early_stopping_rounds=5)
        print("gbm_train has been done!!!")
        
        y_pred = gbm.predict(x_pre, num_iteration=gbm.best_iteration)[0]
        pred_score.append(y_pred)
        print("00001")
        print(predict_data1.index[i])
        channel_id.append(predict_data1.index[i])

    
    result=pd.DataFrame({'project':'browser_app','channel_id': channel_id, 'score':pred_score,
                            'p_dt':p_dt})
    
    # 将数据转成spark dataframe格式
    schema = StructType([
        StructField("project", StringType(), True),
        StructField("channel_id", StringType(), True),
        StructField("score", DoubleType(), True),
        StructField("p_dt", StringType(), True)])
    df = spark.createDataFrame(result, schema=schema)
    first_run = "2"
    if first_run != "1":
        df.write.format("orc").mode("overwrite").saveAsTable('dp_dm.dw_channel_evaluation_browser_app')
        df.write.format("orc").mode("append").partitionBy("p_dt").saveAsTable('dp_dm.dw_channel_evaluation_browser_app_all')
    else:
        df.write.format("orc").mode("overwrite").saveAsTable('dp_dm.dw_channel_evaluation_browser_app')
        df.write.format("orc").mode("overwrite").partitionBy("p_dt").saveAsTable('dp_dm.dw_channel_evaluation_browser_app_all')


if __name__ == '__main__':
    # 读取渠道新增及留存数据文件
    time_now = datetime.now()
    project = 'browser_app'
    channel_evaluation_algo(project,time_now)

DVD_D

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
移动APP渠道质量评价体系

一、为什么要做渠道质量评价体系：结合推广运营目的，评估用户渠道的好坏，好的渠道要拓量，较差的渠道要控制；二、如何做：1.根据项目所处阶段构建渠道质量评价指标体系：新项目-拉新：此阶段尽可能多的获取新用户，新增80%，活跃20%，注意砍掉大量作弊用户，可允许少部分；下载量、登录量、注册量、登录率、注册率。。。发展中项目-活跃：新增40%+活跃40%+营收20%；日活、月活、次留、7留、30留、人均使用时长。。。成熟项目-营收：新增20%-活跃40%+营收40%；成本、收入、ROI、创收人数占
复制链接

扫一扫