准备
CREATE TABLE db.test (
`m_id` STRING COMMENT '商品id',
`s_id` STRING COMMENT '商品挂id',
`s_name` STRING COMMENT '名称',
`s_number` BIGINT COMMENT '编号'
)
insert into db.test (m_id, s_id, s_name, s_number) values
('1', '1002', '尺码1002', 1001), ('1', '1002', '尺码1002', 1002), ('1', '1003', '尺码1003', 1003), ('1', '1004', '尺码1004', 1004)
, ('2', '2001', '尺码2001', 2001)
, ('2', '2002', '尺码2002', 2002)
, ('3', '3001', '尺码3001', 3001), ('3', '3002', '尺码3002', 3002), ('3', '3003', '尺码3003', 3003), ('1', '3004', '尺码3004', 3004)
, ('4', '4001', '尺码4001', 4001), ('4', '4002', '尺码4002', 4002);
查看数据
数据收拢
-- mid收拢
select m_id, collect_list(s_struct) from (
select m_id, struct(s_id, s_name, s_number) as s_struct from db.tmp_test_all_20210804
) a group by m_id;
分组收拢
上面收到m_id维度,继续以两条数据一组,收拢成两条
由上面的代码可知,需要寻找group by 共同点, 因此需要引入另一个参数,组别
思路:
以N条为一组
count(*) as 总数
ceil(count(**) / N) as 组别
ceil (rand() * 组别) as 组编号
grop by 组编号
create temporary view vw_future_goods_detail as
select m_id, collect_list(s_struct) as s_item_list from (
select m_id, struct(s_id, s_name, s_number) as s_struct from db.tmp_test_all_20210804
) a group by m_id;
-- 统计总量
CREATE temporary view vw_pms_group_size as
select ceil(count(*) / 2) AS group_size
from vw_future_goods_detail;
-- 将merchandise_no分组
CREATE temporary view vw_pms_goods_hash as
select /*+ BROADCAST(b) */
struct(a.m_id, a.s_item_list) as goods_struct
, ceil(rand() * b.group_size) AS hash
from vw_future_goods_detail a
join vw_pms_group_size b;
-- 按分组合并
CREATE temporary view vw_pms_goods_list as
select a.hash, collect_list(a.goods_struct) as goods_list
from vw_pms_goods_hash a group by hash;
-- 创建临时表
drop table if exists db.temp_test_20210804;
create table db.temp_test_20210804 as
select hash, to_json(goods_list) as goods_list from vw_pms_goods_list;
最终效果:
解开JSON数组对象
explode_outer 将数据展开
-- 解开第一层 数组
select explode_outer(list) from (
select from_json(goods_list, 'ARRAY<STRING>') as list
from db.temp_test_20210804
) a
mid 和 s_item分开
select json_tuple(item, 'm_id', 's_item_list') from (
select explode_outer(list) as item from (
select from_json(goods_list, 'ARRAY<STRING>') as list
from db.temp_test_20210804
) a
) b
c1 字符串 变成 json数组对象
select bigint(c0) as m_id, from_json(c1, 'ARRAY<STRUCT<s_id:STRING, s_name:STRING, s_number:BIGINT>>') as item from (
select json_tuple(item, 'm_id', 's_item_list') from (
select explode_outer(list) as item from (
select from_json(goods_list, 'ARRAY<STRING>') as list
from db.temp_test_20210804
) a
) b
) c
explode_outer 再次将数据展开
select explode_outer (item), m_id from (
select bigint(c0) as m_id, from_json(c1, 'ARRAY<STRUCT<s_id:STRING, s_name:STRING, s_number:BIGINT>>') as item from (
select json_tuple(item, 'm_id', 's_item_list') from (
select explode_outer(list) as item from (
select from_json(goods_list, 'ARRAY<STRING>') as list
from db.temp_test_20210804
) a
) b
) c
) d
解析对象
select m_id, item_a.s_id, item_a.s_name, item_a.s_number from (
select explode_outer (item) as item_a , m_id from (
select bigint(c0) as m_id, from_json(c1, 'ARRAY<STRUCT<s_id:STRING, s_name:STRING, s_number:BIGINT>>') as item from (
select json_tuple(item, 'm_id', 's_item_list') from (
select explode_outer(list) as item from (
select from_json(goods_list, 'ARRAY<STRING>') as list
from db.temp_test_20210804
) a
) b
) c
) d
) f
至
此
,
从
多
条
数
据
聚
合
,
到
聚
合
数
据
解
析
成
多
条
数
据
\color{red}至此,从多条数据聚合,到聚合数据解析成多条数据
至此,从多条数据聚合,到聚合数据解析成多条数据
第二种解法,使用from_json复杂结构
select hash, m_id, item.s_id, item.s_name, item.s_number from (
select hash, m_list.m_id, explode_outer(m_list.s_item_list) as item from (
select explode_outer(list) as m_list, hash from (
select from_json(goods_list, 'ARRAY<STRUCT<m_id:STRING, s_item_list:ARRAY<STRUCT<s_id:STRING, s_name:STRING, s_number:BIGINT>>>>') as list, hash
from db.temp_test_20210804
) a
) b
) c
总结
知 道 的 越 多 , 不 知 道 的 越 多 , 希 望 对 你 有 帮 助 ! \color{red}知道的越多,不知道的越多,希望对你有帮助! 知道的越多,不知道的越多,希望对你有帮助!