7.阿里云 大数据开发实践odpscmdO2O优惠券使用预测

代码有一定错误!!!

1.odps客户端,上传较大文件创建表结构o2o_online_train:

create table if not exists o2o_online_train(
user_id string comment '用户ID',
merchant_id string comment '商户ID',
action string comment '行为',
coupon_id string comment '优惠券ID',
discount_rate string comment '优惠率',
date_received string comment '领取日期',
date string comment '消费日期'
);

(1)上传文件命令

tunnel upload F:\o2o\01_o2o_online_train.csv (替换为你自己的文件路径) o2o_online_train;

创建表结构上传o2o_offline_train

create table if not exists o2o_offline_train (
user_id string comment'用户ID',
merchant_id string comment'商户ID',
coupon_id string comment'优惠券ID',
discount_rate string comment '优惠率',
distance string comment'距离',
date_received string comment'领取日期',
date string comment'消费日期'
);

(2)数据划分

--一.数据划分
create table if not exists dataset3
as
select*from o2o_predict;

create table if not exists feature3
as
select*from o2o_offline_train
where('20160315'<=date
and date<='20160630')
or(date='null'
and'20160315'<=date_received
and date_received <='20160630');

create table if not exists online_feature3
as
select
*from o2o_online_train 
where('20160315'<=date
and date<='20160630')
or(date='null'
and'20160315'<=date_received
and date_received <='20160630');

create table if not exists dataset2
as
select*from o2o_offline_train
where'20160515'<=date_received
and date_received <='20160615';

create table if not exists feature2
as
select*from o2o_offline_train
where('20160201'<=date
and date<='20160514')
or(date='null'and'20160201'<=date_received
and date_received <='20160514');

create table if not exists online_feature2
as
select*from o2o_online_train where('20160201'<=date
and date<='20160514')or(date='null'and'20160201'<=date_received
and date_received <='20160514');

create table if not exists dataset1 
as select*from o2o_offline_train
where'20160414'<=date_received 
and date_received <='20160514';

create table if not exists feature1
as
select*from o2o_offline_train
where('20160101'<=date
and date<='20160413')or(date='null'and'20160101'<=date_received
and date_received <='20160413');

create table if not exists online_feature1
as
select*from o2o_online_train
where('20160101'<=date
and date<='20160413')or(date='null'and'20160101'<=date_received
and date_received <='20160413');

2.提取特征需要用的数据列

--二.提取特征需要用的数据列
create table merchant3 as select merchant_id,user_id,coupon_id,distance,date_received,date from feature3;

create table d3_f1_t1 as 
select merchant_id,sum(cnt) as total_sales from
(
	select merchant_id,1 as cnt from merchant3 where date!="null"
)t 
group by merchant_id;

create table d3_f1_t2 as 
select merchant_id,sum(cnt) as sales_use_coupon from
(
	select merchant_id,1 as cnt from merchant3 where date!="null" and coupon_id!="null"
)t 
group by merchant_id;

create table d3_f1_t3 as 
select merchant_id,sum(cnt) as total_coupon from
(
	select merchant_id,1 as cnt from merchant3 where coupon_id!="null"
)t 
group by merchant_id;

create table d3_f1_t4 as 
select merchant_id,count(*) as distinct_coupon_count from
(
	select distinct merchant_id,coupon_id from merchant3 where coupon_id!="null"
)t 
group by merchant_id;

create table d3_f1_t5 as 
select merchant_id,avg(distance) as merchant_avg_distance,median(distance) as merchant_median_distance,
       max(distance) as merchant_max_distance,min(distance) as merchant_min_distance from
(
	select merchant_id,distance from merchant3 where date!="null" and coupon_id!="null" and distance!="null"
)t
group by merchant_id;


create table d3_f1_t6 as 
select merchant_id,count(*) as merchant_user_buy_count from
(
	  select distinct merchant_id,user_id from merchant3 where date!="null"
)t 
group by merchant_id;



create table d3_f1 as
select merchant_id,distinct_coupon_count,merchant_avg_distance,merchant_median_distance,cast(merchant_max_distance as bigint) as merchant_max_distance,cast(merchant_min_distance as bigint ) as merchant_min_distance,
	  merchant_user_buy_count,sales_use_coupon,transform_rate,coupon_rate,case when total_coupon is null then 0.0 else total_coupon end as total_coupon,case when total_sales is null then 0.0 else total_sales end as total_sales
from
(
	select tt.*,tt.sales_use_coupon/tt.total_coupon as transform_rate,tt.sales_use_coupon/tt.total_sales as coupon_rate from
	(
	  select merchant_id,total_sales,total_coupon,distinct_coupon_count,merchant_avg_distance,merchant_median_distance,merchant_max_distance,merchant_min_distance,merchant_user_buy_count,
			 case when sales_use_coupon is null then 0.0 else sales_use_coupon end as sales_use_coupon
	  from
	  (
	      select k.*,l.merchant_user_buy_count from
		  (
			select i.*,j.merchant_avg_distance,j.merchant_median_distance,j.merchant_max_distance,j.merchant_min_distance from
			(
			  select g.*,h.distinct_coupon_count from
			  (
				select e.*,f.total_coupon from
				(
				  select c.*,d.sales_use_coupon from
				  (
					select a.*,b.total_sales from
					(select distinct merchant_id from merchant3) a left outer join d3_f1_t1 b 
					on a.merchant_id=b.merchant_id
				  )c left outer join d3_f1_t2 d 
				  on c.merchant_id=d.merchant_id
				)e left outer join d3_f1_t3 f 
				on e.merchant_id=f.merchant_id
			  )g left outer join d3_f1_t4 h 
			  on g.merchant_id=h.merchant_id
			)i left outer join d3_f1_t5 j 
			on i.merchant_id=j.merchant_id
		  )k left outer join d3_f1_t6 l 
		  on k.merchant_id=l.merchant_id
	  )t
	)tt
)ttt;





-- ##############  for dataset2  ################### 
create table merchant2 as select merchant_id,user_id,coupon_id,distance,date_received,date from feature2;

create table d2_f1_t1 as 
select merchant_id,sum(cnt) as total_sales from
(
	select merchant_id,1 as cnt from merchant2 where date!="null"
)t 
group by merchant_id;

create table d2_f1_t2 as 
select merchant_id,sum(cnt) as sales_use_coupon from
(
	select merchant_id,1 as cnt from merchant2 where date!="null" and coupon_id!="null"
)t 
group by merchant_id;

create table d2_f1_t3 as 
select merchant_id,sum(cnt) as total_coupon from
(
	select merchant_id,1 as cnt from merchant2 where coupon_id!="null"
)t 
group by merchant_id;

create table d2_f1_t4 as 
select merchant_id,count(*) as distinct_coupon_count from
(
	select distinct merchant_id,coupon_id from merchant2 where coupon_id!="null"
)t 
group by merchant_id;

create table d2_f1_t5 as 
select merchant_id,avg(distance) as merchant_avg_distance,median(distance) as merchant_median_distance,
       max(distance) as merchant_max_distance,min(distance) as merchant_min_distance from
(
	select merchant_id,distance from merchant2 where date!="null" and coupon_id!="null" and distance!="null"
)t
group by merchant_id;


create table d2_f1_t6 as 
select merchant_id,count(*) as merchant_user_buy_count from
(
	  select distinct merchant_id,user_id from merchant2 where date!="null"
)t 
group by merchant_id;


create table d2_f1 as
select merchant_id,distinct_coupon_count,merchant_avg_distance,merchant_median_distance,cast(merchant_max_distance as bigint) as merchant_max_distance,cast(merchant_min_distance as bigint ) as merchant_min_distance,
	  merchant_user_buy_count,sales_use_coupon,transform_rate,coupon_rate,case when total_coupon is null then 0.0 else total_coupon end as total_coupon,case when total_sales is null then 0.0 else total_sales end as total_sales
from
(
	select tt.*,tt.sales_use_coupon/tt.total_coupon as transform_rate,tt.sales_use_coupon/tt.total_sales as coupon_rate from
	(
	  select merchant_id,total_sales,total_coupon,distinct_coupon_count,merchant_avg_distance,merchant_median_distance,merchant_max_distance,merchant_min_distance,merchant_user_buy_count,
			 case when sales_use_coupon is null then 0.0 else sales_use_coupon end as sales_use_coupon
	  from
	  (
	      select k.*,l.merchant_user_buy_count from
		  (
			select i.*,j.merchant_avg_distance,j.merchant_median_distance,j.merchant_max_distance,j.merchant_min_distance from
			(
			  select g.*,h.distinct_coupon_count from
			  (
				select e.*,f.total_coupon from
				(
				  select c.*,d.sales_use_coupon from
				  (
					select a.*,b.total_sales from
					(select distinct merchant_id from merchant2) a left outer join d2_f1_t1 b 
					on a.merchant_id=b.merchant_id
				  )c left outer join d2_f1_t2 d 
				  on c.merchant_id=d.merchant_id
				)e left outer join d2_f1_t3 f 
				on e.merchant_id=f.merchant_id
			  )g left outer join d2_f1_t4 h 
			  on g.merchant_id=h.merchant_id
			)i left outer join d2_f1_t5 j 
			on i.merchant_id=j.merchant_id
		  )k left outer join d2_f1_t6 l 
		  on k.merchant_id=l.merchant_id
	  )t
	)tt
)ttt;


-- ##############  for dataset1  ################### 
create table merchant1 as select merchant_id,user_id,coupon_id,distance,date_received,date from feature1;

create table d1_f1_t1 as 
select merchant_id,sum(cnt) as total_sales from
(
	select merchant_id,1 as cnt from merchant1 where date!="null"
)t 
group by merchant_id;

create table d1_f1_t2 as 
select merchant_id,sum(cnt) as sales_use_coupon from
(
	select merchant_id,1 as cnt from merchant1 where date!="null" and coupon_id!="null"
)t 
group by merchant_id;

create table d1_f1_t3 as 
select merchant_id,sum(cnt) as total_coupon from
(
	select merchant_id,1 as cnt from merchant1 where coupon_id!="null"
)t 
group by merchant_id;

create table d1_f1_t4 as 
select merchant_id,count(*) as distinct_coupon_count from
(
	select distinct merchant_id,coupon_id from merchant1 where coupon_id!="null"
)t 
group by merchant_id;

create table d1_f1_t5 as 
select merchant_id,avg(distance) as merchant_avg_distance,median(distance) as merchant_median_distance,
       max(distance) as merchant_max_distance,min(distance) as merchant_min_distance from
(
	select merchant_id,distance from merchant1 where date!="null" and coupon_id!="null" and distance!="null"
)t
group by merchant_id;


create table d1_f1_t6 as 
select merchant_id,count(*) as merchant_user_buy_count from
(
	  select distinct merchant_id,user_id from merchant1 where date!="null"
)t 
group by merchant_id;


create table d1_f1 as
select merchant_id,distinct_coupon_count,merchant_avg_distance,merchant_median_distance,cast(merchant_max_distance as bigint) as merchant_max_distance,cast(merchant_min_distance as bigint ) as merchant_min_distance,
	  merchant_user_buy_count,sales_use_coupon,transform_rate,coupon_rate,case when total_coupon is null then 0.0 else total_coupon end as total_coupon,case when total_sales is null then 0.0 else total_sales end as total_sales
from
(
	select tt.*,tt.sales_use_coupon/tt.total_coupon as transform_rate,tt.sales_use_coupon/tt.total_sales as coupon_rate from
	(
	  select merchant_id,total_sales,total_coupon,distinct_coupon_count,merchant_avg_distance,merchant_median_distance,merchant_max_distance,merchant_min_distance,merchant_user_buy_count,
			 case when sales_use_coupon is null then 0.0 else sales_use_coupon end as sales_use_coupon
	  from
	  (
	      select k.*,l.merchant_user_buy_count from
		  (
			select i.*,j.merchant_avg_distance,j.merchant_median_distance,j.merchant_max_distance,j.merchant_min_distance from
			(
			  select g.*,h.distinct_coupon_count from
			  (
				select e.*,f.total_coupon from
				(
				  select c.*,d.sales_use_coupon from
				  (
					select a.*,b.total_sales from
					(select distinct merchant_id from merchant1) a left outer join d1_f1_t1 b 
					on a.merchant_id=b.merchant_id
				  )c left outer join d1_f1_t2 d 
				  on c.merchant_id=d.merchant_id
				)e left outer join d1_f1_t3 f 
				on e.merchant_id=f.merchant_id
			  )g left outer join d1_f1_t4 h 
			  on g.merchant_id=h.merchant_id
			)i left outer join d1_f1_t5 j 
			on i.merchant_id=j.merchant_id
		  )k left outer join d1_f1_t6 l 
		  on k.merchant_id=l.merchant_id
	  )t
	)tt
)ttt;

3.优惠券相关特征

--三.优惠券相关特征
create table d3_f2_t1 as
select t.user_id,t.coupon_id,t.merchant_id,t.date_received,t.days_distance,t.day_of_week,t.day_of_month,t.is_man_jian,t.discount_man,t.discount_jian,t.distance,
	  case when is_man_jian=1 then 1.0 - discount_jian/discount_man else discount_rate end as discount_rate
from
(
  select user_id,coupon_id,merchant_id,date_received,discount_rate,
          case when distance="null" then -1 else cast(distance as bigint) end as distance,
		  datediff(to_date(date_received,"yyyymmdd"),to_date("20160630","yyyymmdd"),"dd") as days_distance,
		  weekday(to_date(date_received,"yyyymmdd")) as day_of_week,
		  cast(substr(date_received,7,2) as bigint) as day_of_month,
		  case when instr(discount_rate,":")=0 then 0 else 1 end as is_man_jian,
		  case when instr(discount_rate,":")=0 then -1 else split_part(discount_rate,":",1) end as discount_man,
		  case when instr(discount_rate,":")=0 then -1 else split_part(discount_rate,":",2) end as discount_jian
  from dataset3
)t;

create table d3_f2_t2 as
select coupon_id,sum(cnt) as coupon_count
from (select coupon_id,1 as cnt from dataset3)t 
group by coupon_id;


create table d3_f2_t3 as
select coupon_id,sum(cnt) as label_coupon_feature_receive_count from
(
  select a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct coupon_id from dataset3)a  left outer join (select coupon_id,1 as cnt from feature3 )b 
  on a.coupon_id=b.coupon_id
)t
group by coupon_id;

create table d3_f2_t4 as
select coupon_id,sum(cnt) as label_coupon_feature_buy_count from
(
  select a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct coupon_id from dataset3)a  left outer join (select coupon_id,1 as cnt from feature3 where coupon_id!="null" and date!="null")b 
  on a.coupon_id=b.coupon_id
)t
group by coupon_id;


create table d3_f2 as 
select e.*,f.label_coupon_feature_buy_count,
		  case when e.label_coupon_feature_receive_count=0 then -1 else f.label_coupon_feature_buy_count/e.label_coupon_feature_receive_count end as label_coupon_feature_rate
from
(
  select c.*,d.label_coupon_feature_receive_count from
  (
	select a.user_id,a.coupon_id,a.merchant_id,a.date_received,a.days_distance,a.day_of_week,a.day_of_month,a.is_man_jian,a.distance,
		  cast(a.discount_man as double) as discount_man,cast(a.discount_jian as double ) as discount_jian,cast(a.discount_rate as double) as discount_rate,b.coupon_count 
	from d3_f2_t1 a join d3_f2_t2 b 
	on a.coupon_id=b.coupon_id
  )c left outer join d3_f2_t3 d 
  on c.coupon_id=d.coupon_id
)e left outer join d3_f2_t4 f 
on e.coupon_id=f.coupon_id;

-- ###################   for dataset2  ################### 
create table d2_f2_t1 as
select t.user_id,t.coupon_id,t.merchant_id,t.date_received,t.date,t.days_distance,t.day_of_week,t.day_of_month,t.is_man_jian,t.discount_man,t.discount_jian,t.distance,
	  case when is_man_jian=1 then 1.0 - discount_jian/discount_man else discount_rate end as discount_rate
from
(
  select user_id,coupon_id,merchant_id,date_received,date,discount_rate,
  	      case when distance="null" then -1 else cast(distance as bigint) end as distance,
		  datediff(to_date(date_received,"yyyymmdd"),to_date("20160514","yyyymmdd"),"dd") as days_distance,
		  weekday(to_date(date_received,"yyyymmdd")) as day_of_week,
		  cast(substr(date_received,7,2) as bigint) as day_of_month,
		  case when instr(discount_rate,":")=0 then 0 else 1 end as is_man_jian,
		  case when instr(discount_rate,":")=0 then -1 else split_part(discount_rate,":",1) end as discount_man,
		  case when instr(discount_rate,":")=0 then -1 else split_part(discount_rate,":",2) end as discount_jian
  from dataset2
)t;

create table d2_f2_t2 as
select coupon_id,sum(cnt) as coupon_count
from (select coupon_id,1 as cnt from dataset2)t 
group by coupon_id;


create table d2_f2_t3 as
select coupon_id,sum(cnt) as label_coupon_feature_receive_count from
(
  select a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct coupon_id from dataset2)a  left outer join (select coupon_id,1 as cnt from feature2 )b 
  on a.coupon_id=b.coupon_id
)t
group by coupon_id;

create table d2_f2_t4 as
select coupon_id,sum(cnt) as label_coupon_feature_buy_count from
(
  select a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct coupon_id from dataset2)a  left outer join (select coupon_id,1 as cnt from feature2 where coupon_id!="null" and date!="null")b 
  on a.coupon_id=b.coupon_id
)t
group by coupon_id;


create table d2_f2 as 
select e.*,f.label_coupon_feature_buy_count,
		  case when e.label_coupon_feature_receive_count=0 then -1 else f.label_coupon_feature_buy_count/e.label_coupon_feature_receive_count end as label_coupon_feature_rate
from
(
  select c.*,d.label_coupon_feature_receive_count from
  (
	select a.user_id,a.coupon_id,a.merchant_id,a.date_received,a.date,a.days_distance,a.day_of_week,a.day_of_month,a.is_man_jian,a.distance,
		  cast(a.discount_man as double) as discount_man,cast(a.discount_jian as double ) as discount_jian,cast(a.discount_rate as double) as discount_rate,b.coupon_count 
	from d2_f2_t1 a join d2_f2_t2 b 
	on a.coupon_id=b.coupon_id
  )c left outer join d2_f2_t3 d 
  on c.coupon_id=d.coupon_id
)e left outer join d2_f2_t4 f 
on e.coupon_id=f.coupon_id;


-- ###################   for dataset1  ################### 
create table d1_f2_t1 as
select t.user_id,t.coupon_id,t.merchant_id,t.date_received,t.date,t.days_distance,t.day_of_week,t.day_of_month,t.is_man_jian,t.discount_man,t.discount_jian,t.distance,
	  case when is_man_jian=1 then 1.0 - discount_jian/discount_man else discount_rate end as discount_rate
from
(
  select user_id,coupon_id,merchant_id,date_received,date,discount_rate,
          case when distance="null" then -1 else cast(distance as bigint) end as distance,
		  datediff(to_date(date_received,"yyyymmdd"),to_date("20160413","yyyymmdd"),"dd") as days_distance,
		  weekday(to_date(date_received,"yyyymmdd")) as day_of_week,
		  cast(substr(date_received,7,2) as bigint) as day_of_month,
		  case when instr(discount_rate,":")=0 then 0 else 1 end as is_man_jian,
		  case when instr(discount_rate,":")=0 then -1 else split_part(discount_rate,":",1) end as discount_man,
		  case when instr(discount_rate,":")=0 then -1 else split_part(discount_rate,":",2) end as discount_jian
  from dataset1
)t;

create table d1_f2_t2 as
select coupon_id,sum(cnt) as coupon_count
from (select coupon_id,1 as cnt from dataset1)t 
group by coupon_id;


create table d1_f2_t3 as
select coupon_id,sum(cnt) as label_coupon_feature_receive_count from
(
  select a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct coupon_id from dataset1)a  left outer join (select coupon_id,1 as cnt from feature1 )b 
  on a.coupon_id=b.coupon_id
)t
group by coupon_id;

create table d1_f2_t4 as
select coupon_id,sum(cnt) as label_coupon_feature_buy_count from
(
  select a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct coupon_id from dataset1)a  left outer join (select coupon_id,1 as cnt from feature1 where coupon_id!="null" and date!="null")b 
  on a.coupon_id=b.coupon_id
)t
group by coupon_id;


create table d1_f2 as 
select e.*,f.label_coupon_feature_buy_count,
		  case when e.label_coupon_feature_receive_count=0 then -1 else f.label_coupon_feature_buy_count/e.label_coupon_feature_receive_count end as label_coupon_feature_rate
from
(
  select c.*,d.label_coupon_feature_receive_count from
  (
	select a.user_id,a.coupon_id,a.merchant_id,a.date_received,a.date,a.days_distance,a.day_of_week,a.day_of_month,a.is_man_jian,a.distance,
		  cast(a.discount_man as double) as discount_man,cast(a.discount_jian as double ) as discount_jian,cast(a.discount_rate as double) as discount_rate,b.coupon_count 
	from d1_f2_t1 a join d1_f2_t2 b 
	on a.coupon_id=b.coupon_id
  )c left outer join d1_f2_t3 d 
  on c.coupon_id=d.coupon_id
)e left outer join d1_f2_t4 f 
on e.coupon_id=f.coupon_id;

4.用户相关特征

create table user3 as select user_id,merchant_id,coupon_id,discount_rate,distance,date_received,date from feature3;

create table d3_f3_t1 as 
select user_id,count(*) as count_merchant from
(
	select distinct user_id,merchant_id from user3 where date!="null"
)t 
group by user_id;

create table d3_f3_t2 as
select user_id,avg(distance) as user_avg_distance,min(distance) as user_min_distance,max(distance) as user_max_distance,median(distance) as user_median_distance from
(
	select user_id,distance from user3 where date!="null" and coupon_id!="null" and distance!="null"
)t 
group by user_id;

create table d3_f3_t3 as
select user_id,count(*) as buy_use_coupon from
(
	select user_id from user3 where date!="null" and coupon_id!="null"
)t 
group by user_id;

create table d3_f3_t4 as
select user_id,count(*) as buy_total from
(
	select user_id from user3 where date!="null"
)t 
group by user_id;

create table d3_f3_t5 as
select user_id,count(*) as coupon_received from
(
	select user_id from user3 where coupon_id!="null"
)t 
group by user_id;

create table d3_f3_t6 as 
select user_id,avg(day_gap) as avg_diff_date_datereceived,min(day_gap) as min_diff_date_datereceived,max(day_gap) as max_diff_date_datereceived from
(
  select user_id,datediff(to_date(date_received,"yyyymmdd"),to_date(date,"yyyymmdd"),"dd") as day_gap 
  from user3 
  where date!="null" and date_received!="null"
)t 
group by user_id;


create table d3_f3 as
select user_id,user_avg_distance,cast(user_min_distance as double) as user_min_distance,cast(user_max_distance as double) as user_max_distance,user_median_distance,
       abs(avg_diff_date_datereceived) as avg_diff_date_datereceived,abs(min_diff_date_datereceived) as min_diff_date_datereceived,abs(max_diff_date_datereceived) max_diff_date_datereceived,
       buy_use_coupon,buy_use_coupon_rate,user_coupon_transform_rate,
	   case when count_merchant is null then 0.0 else count_merchant end as count_merchant,
	   case when buy_total is null then 0.0 else buy_total end as buy_total,
	   case when coupon_received is null then 0.0 else coupon_received end as coupon_received
from
(
	select tt.*,tt.buy_use_coupon/tt.buy_total as buy_use_coupon_rate,tt.buy_use_coupon/tt.coupon_received as user_coupon_transform_rate from

	(
	  select user_id,count_merchant,user_avg_distance,user_min_distance,user_max_distance,user_median_distance,buy_total,coupon_received,avg_diff_date_datereceived,min_diff_date_datereceived,
			 max_diff_date_datereceived,case when buy_use_coupon is null then 0.0 else buy_use_coupon end as buy_use_coupon
	  from
	  (
		  select k.*,l.avg_diff_date_datereceived,l.min_diff_date_datereceived,l.max_diff_date_datereceived from
		  (
			select i.*,j.coupon_received from
			(
			  select g.*,h.buy_total from
			  (
				select e.*,f.buy_use_coupon from
				(
				  select c.*,d.user_avg_distance,d.user_min_distance,d.user_max_distance,d.user_median_distance from
				  (
					select a.*,b.count_merchant from
					(select distinct user_id from user3) a left outer join d3_f3_t1 b
					on a.user_id=b.user_id
				  )c left outer join d3_f3_t2 d
				  on c.user_id=d.user_id
				)e left outer join d3_f3_t3 f
				on e.user_id=f.user_id
			  )g left outer join d3_f3_t4 h
			  on g.user_id=h.user_id
			)i left outer join d3_f3_t5 j 
			on i.user_id=j.user_id
		  )k left outer join d3_f3_t6 l
		  on k.user_id=l.user_id
	  )t
	)tt
)ttt;





-- ###################   for dataset2  ################### 
create table user2 as select user_id,merchant_id,coupon_id,discount_rate,distance,date_received,date from feature2;

create table d2_f3_t1 as 
select user_id,count(*) as count_merchant from
(
	select distinct user_id,merchant_id from user2 where date!="null"
)t 
group by user_id;

create table d2_f3_t2 as
select user_id,avg(distance) as user_avg_distance,min(distance) as user_min_distance,max(distance) as user_max_distance,median(distance) as user_median_distance from
(
	select user_id,distance from user2 where date!="null" and coupon_id!="null" and distance!="null"
)t 
group by user_id;

create table d2_f3_t3 as
select user_id,count(*) as buy_use_coupon from
(
	select user_id from user2 where date!="null" and coupon_id!="null"
)t 
group by user_id;

create table d2_f3_t4 as
select user_id,count(*) as buy_total from
(
	select user_id from user2 where date!="null"
)t 
group by user_id;

create table d2_f3_t5 as
select user_id,count(*) as coupon_received from
(
	select user_id from user2 where coupon_id!="null"
)t 
group by user_id;

create table d2_f3_t6 as 
select user_id,avg(day_gap) as avg_diff_date_datereceived,min(day_gap) as min_diff_date_datereceived,max(day_gap) as max_diff_date_datereceived from
(
  select user_id,datediff(to_date(date_received,"yyyymmdd"),to_date(date,"yyyymmdd"),"dd") as day_gap 
  from user2
  where date!="null" and date_received!="null"
)t 
group by user_id;


create table d2_f3 as
select user_id,user_avg_distance,cast(user_min_distance as double) as user_min_distance,cast(user_max_distance as double) as user_max_distance,user_median_distance,
       abs(avg_diff_date_datereceived) as avg_diff_date_datereceived,abs(min_diff_date_datereceived) as min_diff_date_datereceived,abs(max_diff_date_datereceived) max_diff_date_datereceived,
       buy_use_coupon,buy_use_coupon_rate,user_coupon_transform_rate,
	   case when count_merchant is null then 0.0 else count_merchant end as count_merchant,
	   case when buy_total is null then 0.0 else buy_total end as buy_total,
	   case when coupon_received is null then 0.0 else coupon_received end as coupon_received
from
(
	select tt.*,tt.buy_use_coupon/tt.buy_total as buy_use_coupon_rate,tt.buy_use_coupon/tt.coupon_received as user_coupon_transform_rate from

	(
	  select user_id,count_merchant,user_avg_distance,user_min_distance,user_max_distance,user_median_distance,buy_total,coupon_received,avg_diff_date_datereceived,min_diff_date_datereceived,
			 max_diff_date_datereceived,case when buy_use_coupon is null then 0.0 else buy_use_coupon end as buy_use_coupon
	  from
	  (
		  select k.*,l.avg_diff_date_datereceived,l.min_diff_date_datereceived,l.max_diff_date_datereceived from
		  (
			select i.*,j.coupon_received from
			(
			  select g.*,h.buy_total from
			  (
				select e.*,f.buy_use_coupon from
				(
				  select c.*,d.user_avg_distance,d.user_min_distance,d.user_max_distance,d.user_median_distance from
				  (
					select a.*,b.count_merchant from
					(select distinct user_id from user2) a left outer join d2_f3_t1 b
					on a.user_id=b.user_id
				  )c left outer join d2_f3_t2 d
				  on c.user_id=d.user_id
				)e left outer join d2_f3_t3 f
				on e.user_id=f.user_id
			  )g left outer join d2_f3_t4 h
			  on g.user_id=h.user_id
			)i left outer join d2_f3_t5 j 
			on i.user_id=j.user_id
		  )k left outer join d2_f3_t6 l
		  on k.user_id=l.user_id
	  )t
	)tt
)ttt;


-- ###################   for dataset1  ################### 
create table user1 as select user_id,merchant_id,coupon_id,discount_rate,distance,date_received,date from feature1;

create table d1_f3_t1 as 
select user_id,count(*) as count_merchant from
(
	select distinct user_id,merchant_id from user1 where date!="null"
)t 
group by user_id;

create table d1_f3_t2 as
select user_id,avg(distance) as user_avg_distance,min(distance) as user_min_distance,max(distance) as user_max_distance,median(distance) as user_median_distance from
(
	select user_id,distance from user1 where date!="null" and coupon_id!="null" and distance!="null"
)t 
group by user_id;

create table d1_f3_t3 as
select user_id,count(*) as buy_use_coupon from
(
	select user_id from user1 where date!="null" and coupon_id!="null"
)t 
group by user_id;

create table d1_f3_t4 as
select user_id,count(*) as buy_total from
(
	select user_id from user1 where date!="null"
)t 
group by user_id;

create table d1_f3_t5 as
select user_id,count(*) as coupon_received from
(
	select user_id from user1 where coupon_id!="null"
)t 
group by user_id;

create table d1_f3_t6 as 
select user_id,avg(day_gap) as avg_diff_date_datereceived,min(day_gap) as min_diff_date_datereceived,max(day_gap) as max_diff_date_datereceived from
(
  select user_id,datediff(to_date(date_received,"yyyymmdd"),to_date(date,"yyyymmdd"),"dd") as day_gap 
  from user1
  where date!="null" and date_received!="null"
)t 
group by user_id;


create table d1_f3 as
select user_id,user_avg_distance,cast(user_min_distance as double) as user_min_distance,cast(user_max_distance as double) as user_max_distance,user_median_distance,
       abs(avg_diff_date_datereceived) as avg_diff_date_datereceived,abs(min_diff_date_datereceived) as min_diff_date_datereceived,abs(max_diff_date_datereceived) max_diff_date_datereceived,
       buy_use_coupon,buy_use_coupon_rate,user_coupon_transform_rate,
	   case when count_merchant is null then 0.0 else count_merchant end as count_merchant,
	   case when buy_total is null then 0.0 else buy_total end as buy_total,
	   case when coupon_received is null then 0.0 else coupon_received end as coupon_received
from
(
	select tt.*,tt.buy_use_coupon/tt.buy_total as buy_use_coupon_rate,tt.buy_use_coupon/tt.coupon_received as user_coupon_transform_rate from

	(
	  select user_id,count_merchant,user_avg_distance,user_min_distance,user_max_distance,user_median_distance,buy_total,coupon_received,avg_diff_date_datereceived,min_diff_date_datereceived,
			 max_diff_date_datereceived,case when buy_use_coupon is null then 0.0 else buy_use_coupon end as buy_use_coupon
	  from
	  (
		  select k.*,l.avg_diff_date_datereceived,l.min_diff_date_datereceived,l.max_diff_date_datereceived from
		  (
			select i.*,j.coupon_received from
			(
			  select g.*,h.buy_total from
			  (
				select e.*,f.buy_use_coupon from
				(
				  select c.*,d.user_avg_distance,d.user_min_distance,d.user_max_distance,d.user_median_distance from
				  (
					select a.*,b.count_merchant from
					(select distinct user_id from user1) a left outer join d1_f3_t1 b
					on a.user_id=b.user_id
				  )c left outer join d1_f3_t2 d
				  on c.user_id=d.user_id
				)e left outer join d1_f3_t3 f
				on e.user_id=f.user_id
			  )g left outer join d1_f3_t4 h
			  on g.user_id=h.user_id
			)i left outer join d1_f3_t5 j 
			on i.user_id=j.user_id
		  )k left outer join d1_f3_t6 l
		  on k.user_id=l.user_id
	  )t
	)tt
)ttt;

5.用户商家交互特征

create table d3_f4_t1 as 
select user_id,merchant_id,count(*) as user_merchant_buy_total from
( select user_id,merchant_id from feature3 where date!="null" )t
group by user_id,merchant_id;

create table d3_f4_t2 as 
select user_id,merchant_id,count(*) as user_merchant_received from
( select user_id,merchant_id from feature3 where coupon_id!="null" )t
group by user_id,merchant_id;

create table d3_f4_t3 as 
select user_id,merchant_id,count(*) as user_merchant_buy_use_coupon from
( select user_id,merchant_id from feature3 where date!="null" and date_received!="null")t
group by user_id,merchant_id;

create table d3_f4_t4 as 
select user_id,merchant_id,count(*) as user_merchant_any from
( select user_id,merchant_id from feature3 )t
group by user_id,merchant_id;


create table d3_f4_t5 as 
select user_id,merchant_id,count(*) as user_merchant_buy_common from
( select user_id,merchant_id from feature3 where date!="null" and coupon_id=="null" )t
group by user_id,merchant_id;


create table d3_f4 as 
select user_id,merchant_id,user_merchant_buy_use_coupon,user_merchant_buy_common,
       user_merchant_coupon_transform_rate,user_merchant_coupon_buy_rate,user_merchant_common_buy_rate,user_merchant_rate,
	   case when user_merchant_buy_total is null then 0.0 else user_merchant_buy_total end as user_merchant_buy_total,
	   case when user_merchant_received is null then 0.0 else user_merchant_received end as user_merchant_received,
	   case when user_merchant_any is null then 0.0 else user_merchant_any end as user_merchant_any
from
(
  select tt.*,tt.user_merchant_buy_use_coupon/tt.user_merchant_received as user_merchant_coupon_transform_rate,
			  tt.user_merchant_buy_use_coupon/tt.user_merchant_buy_total as user_merchant_coupon_buy_rate,
			  tt.user_merchant_buy_common/tt.user_merchant_buy_total as user_merchant_common_buy_rate,
			  tt.user_merchant_buy_total/tt.user_merchant_any as user_merchant_rate
  from
  (
	  select user_id,merchant_id,user_merchant_buy_total,user_merchant_received,user_merchant_any,
			 case when user_merchant_buy_use_coupon is null then 0.0 else user_merchant_buy_use_coupon end as user_merchant_buy_use_coupon,
			 case when user_merchant_buy_common is null then 0.0 else user_merchant_buy_common end as user_merchant_buy_common
	  from
	  (
		  select i.*,j.user_merchant_buy_common from
		  (
			select g.*,h.user_merchant_any from
			(
			  select e.*,f.user_merchant_buy_use_coupon from
			  (
				select c.*,d.user_merchant_received from
				(
				  select a.*,b.user_merchant_buy_total from
				  (select distinct user_id,merchant_id from feature3) a left outer join d3_f4_t1 b 
				  on a.user_id=b.user_id and a.merchant_id=b.merchant_id
				)c left outer join d3_f4_t2 d 
				on c.user_id=d.user_id and c.merchant_id=d.merchant_id
			  )e left outer join d3_f4_t3 f 
			  on e.user_id=f.user_id and e.merchant_id=f.merchant_id
			)g  left outer join d3_f4_t4 h 
			on g.user_id=h.user_id and g.merchant_id=h.merchant_id
		  )i left outer join d3_f4_t5 j
		  on i.user_id=j.user_id and i.merchant_id=j.merchant_id
	  )t
  )tt
)ttt;



-- ###################   for dataset2  ################### 
create table d2_f4_t1 as 
select user_id,merchant_id,count(*) as user_merchant_buy_total from
( select user_id,merchant_id from feature2 where date!="null" )t
group by user_id,merchant_id;

create table d2_f4_t2 as 
select user_id,merchant_id,count(*) as user_merchant_received from
( select user_id,merchant_id from feature2 where coupon_id!="null" )t
group by user_id,merchant_id;

create table d2_f4_t3 as 
select user_id,merchant_id,count(*) as user_merchant_buy_use_coupon from
( select user_id,merchant_id from feature2 where date!="null" and date_received!="null")t
group by user_id,merchant_id;

create table d2_f4_t4 as 
select user_id,merchant_id,count(*) as user_merchant_any from
( select user_id,merchant_id from feature2 )t
group by user_id,merchant_id;


create table d2_f4_t5 as 
select user_id,merchant_id,count(*) as user_merchant_buy_common from
( select user_id,merchant_id from feature2 where date!="null" and coupon_id=="null" )t
group by user_id,merchant_id;


create table d2_f4 as 
select user_id,merchant_id,user_merchant_buy_use_coupon,user_merchant_buy_common,
       user_merchant_coupon_transform_rate,user_merchant_coupon_buy_rate,user_merchant_common_buy_rate,user_merchant_rate,
	   case when user_merchant_buy_total is null then 0.0 else user_merchant_buy_total end as user_merchant_buy_total,
	   case when user_merchant_received is null then 0.0 else user_merchant_received end as user_merchant_received,
	   case when user_merchant_any is null then 0.0 else user_merchant_any end as user_merchant_any
from
(
  select tt.*,tt.user_merchant_buy_use_coupon/tt.user_merchant_received as user_merchant_coupon_transform_rate,
			  tt.user_merchant_buy_use_coupon/tt.user_merchant_buy_total as user_merchant_coupon_buy_rate,
			  tt.user_merchant_buy_common/tt.user_merchant_buy_total as user_merchant_common_buy_rate,
			  tt.user_merchant_buy_total/tt.user_merchant_any as user_merchant_rate
  from
  (
	  select user_id,merchant_id,user_merchant_buy_total,user_merchant_received,user_merchant_any,
			 case when user_merchant_buy_use_coupon is null then 0.0 else user_merchant_buy_use_coupon end as user_merchant_buy_use_coupon,
			 case when user_merchant_buy_common is null then 0.0 else user_merchant_buy_common end as user_merchant_buy_common
	  from
	  (
		  select i.*,j.user_merchant_buy_common from
		  (
			select g.*,h.user_merchant_any from
			(
			  select e.*,f.user_merchant_buy_use_coupon from
			  (
				select c.*,d.user_merchant_received from
				(
				  select a.*,b.user_merchant_buy_total from
				  (select distinct user_id,merchant_id from feature2) a left outer join d2_f4_t1 b 
				  on a.user_id=b.user_id and a.merchant_id=b.merchant_id
				)c left outer join d2_f4_t2 d 
				on c.user_id=d.user_id and c.merchant_id=d.merchant_id
			  )e left outer join d2_f4_t3 f 
			  on e.user_id=f.user_id and e.merchant_id=f.merchant_id
			)g  left outer join d2_f4_t4 h 
			on g.user_id=h.user_id and g.merchant_id=h.merchant_id
		  )i left outer join d2_f4_t5 j
		  on i.user_id=j.user_id and i.merchant_id=j.merchant_id
	  )t
  )tt
)ttt;



-- ###################   for dataset1  ################### 
create table d1_f4_t1 as 
select user_id,merchant_id,count(*) as user_merchant_buy_total from
( select user_id,merchant_id from feature1 where date!="null" )t
group by user_id,merchant_id;

create table d1_f4_t2 as 
select user_id,merchant_id,count(*) as user_merchant_received from
( select user_id,merchant_id from feature1 where coupon_id!="null" )t
group by user_id,merchant_id;

create table d1_f4_t3 as 
select user_id,merchant_id,count(*) as user_merchant_buy_use_coupon from
( select user_id,merchant_id from feature1 where date!="null" and date_received!="null")t
group by user_id,merchant_id;

create table d1_f4_t4 as 
select user_id,merchant_id,count(*) as user_merchant_any from
( select user_id,merchant_id from feature1 )t
group by user_id,merchant_id;


create table d1_f4_t5 as 
select user_id,merchant_id,count(*) as user_merchant_buy_common from
( select user_id,merchant_id from feature1 where date!="null" and coupon_id=="null" )t
group by user_id,merchant_id;


create table d1_f4 as 
select user_id,merchant_id,user_merchant_buy_use_coupon,user_merchant_buy_common,
       user_merchant_coupon_transform_rate,user_merchant_coupon_buy_rate,user_merchant_common_buy_rate,user_merchant_rate,
	   case when user_merchant_buy_total is null then 0.0 else user_merchant_buy_total end as user_merchant_buy_total,
	   case when user_merchant_received is null then 0.0 else user_merchant_received end as user_merchant_received,
	   case when user_merchant_any is null then 0.0 else user_merchant_any end as user_merchant_any
from
(
  select tt.*,tt.user_merchant_buy_use_coupon/tt.user_merchant_received as user_merchant_coupon_transform_rate,
			  tt.user_merchant_buy_use_coupon/tt.user_merchant_buy_total as user_merchant_coupon_buy_rate,
			  tt.user_merchant_buy_common/tt.user_merchant_buy_total as user_merchant_common_buy_rate,
			  tt.user_merchant_buy_total/tt.user_merchant_any as user_merchant_rate
  from
  (
	  select user_id,merchant_id,user_merchant_buy_total,user_merchant_received,user_merchant_any,
			 case when user_merchant_buy_use_coupon is null then 0.0 else user_merchant_buy_use_coupon end as user_merchant_buy_use_coupon,
			 case when user_merchant_buy_common is null then 0.0 else user_merchant_buy_common end as user_merchant_buy_common
	  from
	  (
		  select i.*,j.user_merchant_buy_common from
		  (
			select g.*,h.user_merchant_any from
			(
			  select e.*,f.user_merchant_buy_use_coupon from
			  (
				select c.*,d.user_merchant_received from
				(
				  select a.*,b.user_merchant_buy_total from
				  (select distinct user_id,merchant_id from feature1) a left outer join d1_f4_t1 b 
				  on a.user_id=b.user_id and a.merchant_id=b.merchant_id
				)c left outer join d1_f4_t2 d 
				on c.user_id=d.user_id and c.merchant_id=d.merchant_id
			  )e left outer join d1_f4_t3 f 
			  on e.user_id=f.user_id and e.merchant_id=f.merchant_id
			)g  left outer join d1_f4_t4 h 
			on g.user_id=h.user_id and g.merchant_id=h.merchant_id
		  )i left outer join d1_f4_t5 j
		  on i.user_id=j.user_id and i.merchant_id=j.merchant_id
	  )t
  )tt
)ttt;

6.用户优惠券交互特征

create table d3_f6_t1 as
select user_id,coupon_id,sum(cnt) as label_user_coupon_feature_receive_count from
(
  select a.user_id,a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct user_id,coupon_id from dataset3)a  left outer join (select user_id,coupon_id,1 as cnt from feature3 )b 
  on a.user_id=b.user_id and a.coupon_id=b.coupon_id
)t
group by user_id,coupon_id;

create table d3_f6_t2 as
select user_id,coupon_id,sum(cnt) as label_user_coupon_feature_buy_count from
(
  select a.user_id,a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct user_id,coupon_id from dataset3)a  left outer join (select user_id,coupon_id,1 as cnt from feature3 where date!="null" and coupon_id!="null")b 
  on a.user_id=b.user_id and a.coupon_id=b.coupon_id
)t
group by user_id,coupon_id;

create table d3_f6 as 
select a.*,b.label_user_coupon_feature_buy_count,
       case when a.label_user_coupon_feature_receive_count=0 then -1 else b.label_user_coupon_feature_buy_count/a.label_user_coupon_feature_receive_count end as label_user_coupon_feature_rate
from d3_f6_t1 a left outer join d3_f6_t2 b 
on a.user_id=b.user_id and a.coupon_id=b.coupon_id;


-- ###################   for dataset2  ################### 
create table d2_f6_t1 as
select user_id,coupon_id,sum(cnt) as label_user_coupon_feature_receive_count from
(
  select a.user_id,a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct user_id,coupon_id from dataset2)a  left outer join (select user_id,coupon_id,1 as cnt from feature2 )b 
  on a.user_id=b.user_id and a.coupon_id=b.coupon_id
)t
group by user_id,coupon_id;

create table d2_f6_t2 as
select user_id,coupon_id,sum(cnt) as label_user_coupon_feature_buy_count from
(
  select a.user_id,a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct user_id,coupon_id from dataset2)a  left outer join (select user_id,coupon_id,1 as cnt from feature2 where date!="null" and coupon_id!="null")b 
  on a.user_id=b.user_id and a.coupon_id=b.coupon_id
)t
group by user_id,coupon_id;

create table d2_f6 as 
select a.*,b.label_user_coupon_feature_buy_count,
       case when a.label_user_coupon_feature_receive_count=0 then -1 else b.label_user_coupon_feature_buy_count/a.label_user_coupon_feature_receive_count end as label_user_coupon_feature_rate
from d2_f6_t1 a left outer join d2_f6_t2 b 
on a.user_id=b.user_id and a.coupon_id=b.coupon_id;


-- ###################   for dataset1  ################### 
create table d1_f6_t1 as
select user_id,coupon_id,sum(cnt) as label_user_coupon_feature_receive_count from
(
  select a.user_id,a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct user_id,coupon_id from dataset1)a  left outer join (select user_id,coupon_id,1 as cnt from feature1 )b 
  on a.user_id=b.user_id and a.coupon_id=b.coupon_id
)t
group by user_id,coupon_id;

create table d1_f6_t2 as
select user_id,coupon_id,sum(cnt) as label_user_coupon_feature_buy_count from
(
  select a.user_id,a.coupon_id, case when b.cnt is null then 0 else 1 end as cnt from
  (select distinct user_id,coupon_id from dataset1)a  left outer join (select user_id,coupon_id,1 as cnt from feature1 where date!="null" and coupon_id!="null")b 
  on a.user_id=b.user_id and a.coupon_id=b.coupon_id
)t
group by user_id,coupon_id;

create table d1_f6 as 
select a.*,b.label_user_coupon_feature_buy_count,
       case when a.label_user_coupon_feature_receive_count=0 then -1 else b.label_user_coupon_feature_buy_count/a.label_user_coupon_feature_receive_count end as label_user_coupon_feature_rate
from d1_f6_t1 a left outer join d1_f6_t2 b 
on a.user_id=b.user_id and a.coupon_id=b.coupon_id;

7.用户线上特征

create table d3_f7_t1 as
select user_id,count(*) as online_buy_total from
(
	select user_id from online_feature3 where action=1
)t 
group by user_id;


create table d3_f7_t2 as
select user_id,count(*) as online_buy_use_coupon from
(
	select user_id from online_feature3 where action=1 and coupon_id!="null" and coupon_id!="fixed"
)t 
group by user_id;

create table d3_f7_t3 as
select user_id,count(*) as online_buy_use_fixed from
(
	select user_id from online_feature3 where action=1 and coupon_id="fixed"
)t 
group by user_id;


create table d3_f7_t4 as
select user_id,count(*) as online_coupon_received from
(
	select user_id from online_feature3 where coupon_id!="null" and coupon_id!="fixed"
)t 
group by user_id;


create table d3_f7_t5 as
select user_id,count(*) as online_buy_merchant_count from
(
	select distinct user_id,merchant_id from online_feature3 where action=1
)t 
group by user_id;

create table d3_f7_t6 as
select user_id,count(*) as online_action_merchant_count from
(
	select distinct user_id,merchant_id from online_feature3
)t 
group by user_id;

create table d3_f7 as
select t.*,t.online_buy_use_coupon+t.online_buy_use_fixed as online_buy_use_coupon_fixed,
           case when t.online_buy_total=0 then -1 else t.online_buy_use_coupon/t.online_buy_total end as online_buy_use_coupon_rate,
		   case when t.online_buy_total=0 then -1 else t.online_buy_use_fixed/t.online_buy_total  end as online_buy_use_fixed_rate,
		   case when t.online_buy_total=0 then -1 else (t.online_buy_use_coupon+t.online_buy_use_fixed)/t.online_buy_total end as  online_buy_use_coupon_fixed_rate,
		   case when t.online_coupon_received=0 then -1 else t.online_buy_use_coupon/t.online_coupon_received end as online_coupon_transform_rate
from		   
(
	select a.user_id,case when b.online_buy_total is null then 0 else b.online_buy_total end as online_buy_total,
					case when c.online_buy_use_coupon is null then 0 else c.online_buy_use_coupon end as online_buy_use_coupon,
					case when d.online_buy_use_fixed is null then 0 else d.online_buy_use_fixed end as online_buy_use_fixed,
					case when e.online_coupon_received is null then 0 else e.online_coupon_received end as online_coupon_received,
					case when f.online_buy_merchant_count is null then 0 else f.online_buy_merchant_count end as online_buy_merchant_count,
					case when g.online_action_merchant_count is null then 0 else g.online_action_merchant_count end as online_action_merchant_count
	from
		(select distinct user_id from online_feature3) a
	left outer join
		d3_f7_t1 b  
		on a.user_id=b.user_id
	left outer join
		d3_f7_t2 c  
		on a.user_id=c.user_id
	left outer join
		d3_f7_t3 d 
		on a.user_id=d.user_id
	left outer join
		d3_f7_t4 e
		on a.user_id=e.user_id
	left outer join
		d3_f7_t5 f
		on a.user_id=f.user_id
	left outer join
		d3_f7_t6 g
		on a.user_id=g.user_id
)t;


-- ##############  for dataset2  ################### 
create table d2_f7_t1 as
select user_id,count(*) as online_buy_total from
(
	select user_id from online_feature2 where action=1
)t 
group by user_id;


create table d2_f7_t2 as
select user_id,count(*) as online_buy_use_coupon from
(
	select user_id from online_feature2 where action=1 and coupon_id!="null" and coupon_id!="fixed"
)t 
group by user_id;

create table d2_f7_t3 as
select user_id,count(*) as online_buy_use_fixed from
(
	select user_id from online_feature2 where action=1 and coupon_id="fixed"
)t 
group by user_id;


create table d2_f7_t4 as
select user_id,count(*) as online_coupon_received from
(
	select user_id from online_feature2 where coupon_id!="null" and coupon_id!="fixed"
)t 
group by user_id;


create table d2_f7_t5 as
select user_id,count(*) as online_buy_merchant_count from
(
	select distinct user_id,merchant_id from online_feature2 where action=1
)t 
group by user_id;

create table d2_f7_t6 as
select user_id,count(*) as online_action_merchant_count from
(
	select distinct user_id,merchant_id from online_feature2
)t 
group by user_id;

create table d2_f7 as
select t.*,t.online_buy_use_coupon+t.online_buy_use_fixed as online_buy_use_coupon_fixed,
           case when t.online_buy_total=0 then -1 else t.online_buy_use_coupon/t.online_buy_total end as online_buy_use_coupon_rate,
		   case when t.online_buy_total=0 then -1 else t.online_buy_use_fixed/t.online_buy_total  end as online_buy_use_fixed_rate,
		   case when t.online_buy_total=0 then -1 else (t.online_buy_use_coupon+t.online_buy_use_fixed)/t.online_buy_total end as  online_buy_use_coupon_fixed_rate,
		   case when t.online_coupon_received=0 then -1 else t.online_buy_use_coupon/t.online_coupon_received end as online_coupon_transform_rate
from		   
(
	select a.user_id,case when b.online_buy_total is null then 0 else b.online_buy_total end as online_buy_total,
					case when c.online_buy_use_coupon is null then 0 else c.online_buy_use_coupon end as online_buy_use_coupon,
					case when d.online_buy_use_fixed is null then 0 else d.online_buy_use_fixed end as online_buy_use_fixed,
					case when e.online_coupon_received is null then 0 else e.online_coupon_received end as online_coupon_received,
					case when f.online_buy_merchant_count is null then 0 else f.online_buy_merchant_count end as online_buy_merchant_count,
					case when g.online_action_merchant_count is null then 0 else g.online_action_merchant_count end as online_action_merchant_count
	from
		(select distinct user_id from online_feature2) a
	left outer join
		d2_f7_t1 b  
		on a.user_id=b.user_id
	left outer join
		d2_f7_t2 c  
		on a.user_id=c.user_id
	left outer join
		d2_f7_t3 d 
		on a.user_id=d.user_id
	left outer join
		d2_f7_t4 e
		on a.user_id=e.user_id
	left outer join
		d2_f7_t5 f
		on a.user_id=f.user_id
	left outer join
		d2_f7_t6 g
		on a.user_id=g.user_id
)t;


-- ##############  for dataset1  ################### 
create table d1_f7_t1 as
select user_id,count(*) as online_buy_total from
(
	select user_id from online_feature1 where action=1
)t 
group by user_id;


create table d1_f7_t2 as
select user_id,count(*) as online_buy_use_coupon from
(
	select user_id from online_feature1 where action=1 and coupon_id!="null" and coupon_id!="fixed"
)t 
group by user_id;

create table d1_f7_t3 as
select user_id,count(*) as online_buy_use_fixed from
(
	select user_id from online_feature1 where action=1 and coupon_id="fixed"
)t 
group by user_id;


create table d1_f7_t4 as
select user_id,count(*) as online_coupon_received from
(
	select user_id from online_feature1 where coupon_id!="null" and coupon_id!="fixed"
)t 
group by user_id;


create table d1_f7_t5 as
select user_id,count(*) as online_buy_merchant_count from
(
	select distinct user_id,merchant_id from online_feature1 where action=1
)t 
group by user_id;

create table d1_f7_t6 as
select user_id,count(*) as online_action_merchant_count from
(
	select distinct user_id,merchant_id from online_feature1
)t 
group by user_id;

create table d1_f7 as
select t.*,t.online_buy_use_coupon+t.online_buy_use_fixed as online_buy_use_coupon_fixed,
           case when t.online_buy_total=0 then -1 else t.online_buy_use_coupon/t.online_buy_total end as online_buy_use_coupon_rate,
		   case when t.online_buy_total=0 then -1 else t.online_buy_use_fixed/t.online_buy_total  end as online_buy_use_fixed_rate,
		   case when t.online_buy_total=0 then -1 else (t.online_buy_use_coupon+t.online_buy_use_fixed)/t.online_buy_total end as  online_buy_use_coupon_fixed_rate,
		   case when t.online_coupon_received=0 then -1 else t.online_buy_use_coupon/t.online_coupon_received end as online_coupon_transform_rate
from		   
(
	select a.user_id,case when b.online_buy_total is null then 0 else b.online_buy_total end as online_buy_total,
					case when c.online_buy_use_coupon is null then 0 else c.online_buy_use_coupon end as online_buy_use_coupon,
					case when d.online_buy_use_fixed is null then 0 else d.online_buy_use_fixed end as online_buy_use_fixed,
					case when e.online_coupon_received is null then 0 else e.online_coupon_received end as online_coupon_received,
					case when f.online_buy_merchant_count is null then 0 else f.online_buy_merchant_count end as online_buy_merchant_count,
					case when g.online_action_merchant_count is null then 0 else g.online_action_merchant_count end as online_action_merchant_count
	from
		(select distinct user_id from online_feature1) a
	left outer join
		d1_f7_t1 b  
		on a.user_id=b.user_id
	left outer join
		d1_f7_t2 c  
		on a.user_id=c.user_id
	left outer join
		d1_f7_t3 d 
		on a.user_id=d.user_id
	left outer join
		d1_f7_t4 e
		on a.user_id=e.user_id
	left outer join
		d1_f7_t5 f
		on a.user_id=f.user_id
	left outer join
		d1_f7_t6 g
		on a.user_id=g.user_id
)t;

8.合并所有特征

create table d3 as
select t.*, case day_of_week when 0 then 1 else 0 end as weekday1,case day_of_week when 1 then 1 else 0 end as weekday2,
            case day_of_week when 2 then 1 else 0 end as weekday3,case day_of_week when 3 then 1 else 0 end as weekday4,
			case day_of_week when 4 then 1 else 0 end as weekday5,case day_of_week when 5 then 1 else 0 end as weekday6,
			case day_of_week when 6 then 1 else 0 end as weekday7 
from
(
    select k.*,l.online_buy_total,l.online_buy_use_coupon ,l.online_buy_use_fixed ,l.online_coupon_received ,l.online_buy_merchant_count ,l.online_action_merchant_count ,
				l.online_buy_use_coupon_fixed ,l.online_buy_use_coupon_rate ,l.online_buy_use_fixed_rate ,l.online_buy_use_coupon_fixed_rate ,l.online_coupon_transform_rate 
	from
	(
		select g.*,j.label_user_coupon_feature_receive_count ,j.label_user_coupon_feature_buy_count ,j.label_user_coupon_feature_rate  from
		(
			  select e.*,f.user_merchant_buy_total,f.user_merchant_received,f.user_merchant_any,f.user_merchant_buy_use_coupon,f.user_merchant_buy_common,
						 f.user_merchant_coupon_transform_rate,f.user_merchant_coupon_buy_rate,f.user_merchant_common_buy_rate,f.user_merchant_rate from
			  (
				select c.*,d.user_avg_distance,d.user_min_distance,d.user_max_distance,d.user_median_distance,d.avg_diff_date_datereceived,d.min_diff_date_datereceived,
					   d.max_diff_date_datereceived,d.buy_use_coupon,d.buy_use_coupon_rate,d.user_coupon_transform_rate,d.count_merchant,d.buy_total,d.coupon_received from
				(
				  select a.*,b.distinct_coupon_count, b.merchant_avg_distance, b.merchant_median_distance, b.merchant_max_distance,b.merchant_user_buy_count,
						 b.merchant_min_distance, b.sales_use_coupon, b.transform_rate,b.coupon_rate,b.total_coupon,b.total_sales from
				  d3_f2 a left outer join d3_f1 b 
				  on a.merchant_id=b.merchant_id
				)c left outer join d3_f3 d
				on c.user_id=d.user_id
			  )e left outer join d3_f4 f
			  on e.user_id=f.user_id and e.merchant_id=f.merchant_id
		  )g left outer join d3_f6 j 
		on g.user_id=j.user_id and g.coupon_id=j.coupon_id
	)k left outer join d3_f7 l 
	on k.user_id=l.user_id
)t;




create table d2 as
select t.*, case day_of_week when 0 then 1 else 0 end as weekday1,case day_of_week when 1 then 1 else 0 end as weekday2,
            case day_of_week when 2 then 1 else 0 end as weekday3,case day_of_week when 3 then 1 else 0 end as weekday4,
			case day_of_week when 4 then 1 else 0 end as weekday5,case day_of_week when 5 then 1 else 0 end as weekday6,
			case day_of_week when 6 then 1 else 0 end as weekday7 
from
(
	select k.*,l.online_buy_total,l.online_buy_use_coupon ,l.online_buy_use_fixed ,l.online_coupon_received ,l.online_buy_merchant_count ,l.online_action_merchant_count ,
				l.online_buy_use_coupon_fixed ,l.online_buy_use_coupon_rate ,l.online_buy_use_fixed_rate ,l.online_buy_use_coupon_fixed_rate ,l.online_coupon_transform_rate 
	from
	(
		select g.*,j.label_user_coupon_feature_receive_count ,j.label_user_coupon_feature_buy_count ,j.label_user_coupon_feature_rate  from
		(
			  select e.*,f.user_merchant_buy_total,f.user_merchant_received,f.user_merchant_any,f.user_merchant_buy_use_coupon,f.user_merchant_buy_common,
						 f.user_merchant_coupon_transform_rate,f.user_merchant_coupon_buy_rate,f.user_merchant_common_buy_rate,f.user_merchant_rate from
			  (
				select c.*,d.user_avg_distance,d.user_min_distance,d.user_max_distance,d.user_median_distance,d.avg_diff_date_datereceived,d.min_diff_date_datereceived,
					   d.max_diff_date_datereceived,d.buy_use_coupon,d.buy_use_coupon_rate,d.user_coupon_transform_rate,d.count_merchant,d.buy_total,d.coupon_received from
				(
				  select a.*,b.distinct_coupon_count, b.merchant_avg_distance, b.merchant_median_distance, b.merchant_max_distance,b.merchant_user_buy_count,
						 b.merchant_min_distance, b.sales_use_coupon, b.transform_rate,b.coupon_rate,b.total_coupon,b.total_sales from
				  d2_f2 a left outer join d2_f1 b 
				  on a.merchant_id=b.merchant_id
				)c left outer join d2_f3 d
				on c.user_id=d.user_id
			  )e left outer join d2_f4 f
			  on e.user_id=f.user_id and e.merchant_id=f.merchant_id
		  )g left outer join d2_f6 j 
		on g.user_id=j.user_id and g.coupon_id=j.coupon_id
	)k left outer join d2_f7 l 
	on k.user_id=l.user_id
)t;



create table d1 as
select t.*, case day_of_week when 0 then 1 else 0 end as weekday1,case day_of_week when 1 then 1 else 0 end as weekday2,
            case day_of_week when 2 then 1 else 0 end as weekday3,case day_of_week when 3 then 1 else 0 end as weekday4,
			case day_of_week when 4 then 1 else 0 end as weekday5,case day_of_week when 5 then 1 else 0 end as weekday6,
			case day_of_week when 6 then 1 else 0 end as weekday7 
from
(
	select k.*,l.online_buy_total,l.online_buy_use_coupon ,l.online_buy_use_fixed ,l.online_coupon_received ,l.online_buy_merchant_count ,l.online_action_merchant_count ,
				l.online_buy_use_coupon_fixed ,l.online_buy_use_coupon_rate ,l.online_buy_use_fixed_rate ,l.online_buy_use_coupon_fixed_rate ,l.online_coupon_transform_rate 
	from	
	(
		select g.*,j.label_user_coupon_feature_receive_count ,j.label_user_coupon_feature_buy_count ,j.label_user_coupon_feature_rate  from
		(
			  select e.*,f.user_merchant_buy_total,f.user_merchant_received,f.user_merchant_any,f.user_merchant_buy_use_coupon,f.user_merchant_buy_common,
						 f.user_merchant_coupon_transform_rate,f.user_merchant_coupon_buy_rate,f.user_merchant_common_buy_rate,f.user_merchant_rate from
			  (
				select c.*,d.user_avg_distance,d.user_min_distance,d.user_max_distance,d.user_median_distance,d.avg_diff_date_datereceived,d.min_diff_date_datereceived,
					   d.max_diff_date_datereceived,d.buy_use_coupon,d.buy_use_coupon_rate,d.user_coupon_transform_rate,d.count_merchant,d.buy_total,d.coupon_received from
				(
				  select a.*,b.distinct_coupon_count, b.merchant_avg_distance, b.merchant_median_distance, b.merchant_max_distance,b.merchant_user_buy_count,
						 b.merchant_min_distance, b.sales_use_coupon, b.transform_rate,b.coupon_rate,b.total_coupon,b.total_sales from
				  d1_f2 a left outer join d1_f1 b 
				  on a.merchant_id=b.merchant_id
				)c left outer join d1_f3 d
				on c.user_id=d.user_id
			  )e left outer join d1_f4 f
			  on e.user_id=f.user_id and e.merchant_id=f.merchant_id
		  )g  left outer join d1_f6 j 
		on g.user_id=j.user_id and g.coupon_id=j.coupon_id
	)k left outer join d1_f7 l 
	on k.user_id=l.user_id
) t

9.为d1和d2添加label

create table d1_train
as
select *
	, case 
		when date != 'null'
		and date_received != 'null'
		and datediff(to_date(date, 'yyyymmdd'), to_date(date_received, 'yyyymmdd'), 'dd') <= 15 then 1
		else 0
	end as label
from d1;

create table d2_train
as
select *
	, case 
		when date != 'null'
		and date_received != 'null'
		and datediff(to_date(date, 'yyyymmdd'), to_date(date_received, 'yyyymmdd'), 'dd') <= 15 then 1
		else 0
	end as label
from d2;
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

_胡杨_

感谢打赏,会多多更新的。

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值