求用户最先购买的两种子类别产品信息(cpzl_zw),要求拼接成 “用户号-产品1-产品2” 的形式
数据表ods_sales_orders的前十行
第一步:求出每位客户消费的下一个产品
SELECT
customer_key,
cpzl_zw,
row_number() over(partition by customer_key order by create_date) as order_num,
lag(cpzl_zw,1) over(partition by customer_key order by create_date) as cpzl_zw1
from ods_sales_orders;
第二步:选出相邻两个产品不一致的记录
create table temp1 as
SELECT
customer_key,
cpzl_zw,
order_num,
cpzl_zw1
from (
SELECT
customer_key,
cpzl_zw,
row_number() over(partition by customer_key order by create_date) as order_num,
lag(cpzl_zw,1) over(partition by customer_key order by create_date) as cpzl_zw1
from ods_sales_orders) a
where cpzl_zw != cpzl_zw1;
第三步:将相邻两个产品不一致的记录的表中的order_num重新排序,以便选出前两个产品
select customer_key,
cpzl_zw,
order_num,
cpzl_zw1,
row_number() over(partition by customer_key order by order_num) as new_order_num
from temp1
第四步:选出前两个产品,并按照要求输出结果
知识点:with as …
with
temp1 as (
SELECT
customer_key,
cpzl_zw,
order_num,
cpzl_zw1
from (
SELECT
customer_key,
cpzl_zw,
row_number() over(partition by customer_key order by create_date) as order_num,
lag(cpzl_zw,1) over(partition by customer_key order by create_date) as cpzl_zw1
from ods_sales_orders) a
where cpzl_zw != cpzl_zw1),
temp2 as (
select customer_key,
cpzl_zw,
order_num,
cpzl_zw1,
row_number() over(partition by customer_key order by order_num) as new_order_num
from temp1)
select
concat(customer_key,'_',concat_ws('_',collect_set(cpzl_zw))) as z1
from temp2
where new_order_num <3
group by customer_key;
结果局部截图: