SELECT * FROM raw_henan
1、客户打分表
create table henan_score as
select t.*,
avg(t."target")over(partition by t."data_type" order by t."data_type")as data_type_score,
avg(t."target")over(partition by t."sex" order by t."sex")as gender_score,
avg(t."target")over(partition by t."age" order by t."age")as age_score
from raw_henan t
2、根据得分表进行分组,分组表
create table deal_data_type_group
as
select
ceil(data_type_score*100)/100 as stat_type_group,
avg(data_type_score)as stat_type_avg_score,
ceil(gender_score*100)/100 as gender_score_group,
avg(gender_score)as gender_avg_score,
ceil(age_score*100)/100 as cus_age_group,
avg(age_score)as cus_age_avg_score,
count(*) as amount,
avg("target")as deal_pp
from henan_score t
having count(*)>=100
group by
ceil(data_type_score*100)/100 ,
ceil(gender_score*100)/100,
ceil(age_score*100)/100
order by amount desc
得到的分组表如下:
真实购买率!(根据信用卡类型分类)
用来训练模型
建模过程略过
测试集为全体数据集
模型结果为带预测值的个体数据集
最后验证下客户分级后预测值与实际值情况
create table pre_deal_result as
select ceil("pre_deal"*100)/100 as level_num,
count(*) as amount ,
avg("pre_deal")as avg_pre_deal,
sum("target")as deal_amount,
avg("target")as deal_pp
from pre_deal_compare
group by ceil("pre_deal"*100)/100
order by 1
结果挺完美的,进一步的去做修正!