mongodb中有下面一组数据,业务场景就不介绍了,看数据应该很好理解
[
{
"_id":"606fdd0805f5710008a82223",
"brand_id":"1",
"brand_name":"APPLE",
"series_id":"11",
"series_name":"iPhone",
"product_name":"iPhone12",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
]
},
{
"_id":"606d7f741492340008497daf",
"brand_id":"1",
"brand_name":"APPLE",
"series_id":"11",
"series_name":"MacBook",
"product_name":"MacBook Pro",
"cover":[
{
"oss_name":"56f42ec6aa7b87010c3b2b09310d37d6.png"
}
]
},
{
"_id":"6080d04d2cc7f20008f22670",
"brand_id":"2",
"brand_name":"SONY",
"series_id":"21",
"series_name":"PlayStation",
"product_name":"PS5",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
]
}
]
我们希望对品牌、系列进行分组聚合变为如下数据
[
{
"brand_id":"1",
"brand_name":"APPLE",
"series_list":[
{
"series_name":"iPhone",
"series_id":"11",
"products_list":[
{
"_id":"606fdd0805f5710008a82223",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"iPhone12"
}
]
},
{
"series_name":"MacBook",
"series_id":"12",
"products_list":[
{
"_id":"606fdd0805f5710008a82223",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"MacBook Pro"
}
]
}
]
},
{
"brand_id":"2",
"brand_name":"SONY",
"series_list":[
{
"series_name":"PlayStation",
"series_id":"21",
"products_list":[
{
"_id":"6080d04d2cc7f20008f22670",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"PS5"
}
]
}
]
}
]
相同品牌的系列分组到同一品牌下的series_list集合中,相同品牌和系列的产品分组到同一品牌下的同一series_list中的products_list集合中
使用mongodb的aggregate框架理论上能实现,但应该比较复杂,如果有高手了解还望不吝赐教。
由于有一定的pandas基础,我尝试使用pandas处理上面的数据
预处理
在分组聚合前,先对数据进行预处理,改一下数据结构,变为如下结构
[
{
"brand_id":"1",
"brand_name":"APPLE",
"series_list":{
"series_name":"iPhone",
"series_id":"11",
"products_list":{
"_id":"606fdd0805f5710008a82223",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"iPhone12.0"
}
}
},
{
"brand_id":"1",
"brand_name":"APPLE",
"series_list":{
"series_name":"MacBook",
"series_id":"11",
"products_list":{
"_id":"606d7f741492340008497daf",
"cover":[
{
"oss_name":"56f42ec6aa7b87010c3b2b09310d37d6.png"
}
],
"product_name":"MacBook Pro"
}
}
},
{
"brand_id":"2",
"brand_name":"SONY",
"series_list":{
"series_name":"PlayStation",
"series_id":"21",
"products_list":{
"_id":"6080d04d2cc7f20008f22670",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"PS5"
}
}
}
]
for product in prodocts_list:
product['series_list'] = {}
product['series_list']['series_name'] = product.pop('series_name')
product['series_list']['series_id'] = product.pop('series_id')
product['series_list']['products_list'] = {}
product['series_list']['products_list']['_id'] = product.pop('_id')
product['series_list']['products_list']['cover'] = product.pop('cover')
product['series_list']['products_list']['product_name'] = product.pop('product_name')
分组聚合
进行两次groupby操作,第一次groupby将所有相同品牌的产品分组到同一series_list中
>>> brands = pd.DataFrame(prodocts_list).groupby(['brand_id', 'brand_name']).agg(list).reset_index().to_dict('records')
[
{
"brand_id":"1",
"brand_name":"APPLE",
"series_list":[
{
"series_name":"iPhone",
"series_id":"11",
"products_list":{
"_id":"606fdd0805f5710008a82223",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"iPhone12"
}
},
{
"series_name":"MacBook",
"series_id":"11",
"products_list":{
"_id":"606d7f741492340008497daf",
"cover":[
{
"oss_name":"56f42ec6aa7b87010c3b2b09310d37d6.png"
}
],
"product_name":"MacBook Pro"
}
}
]
},
{
"brand_id":"2",
"brand_name":"SONY",
"series_list":[
{
"series_name":"PlayStation",
"series_id":"21",
"products_list":{
"_id":"6080d04d2cc7f20008f22670",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"PS5"
}
}
]
}
]
第二次groupby,则对每个品牌中的series_list进行groupby,将相同系列的产品分组到同一products_list中
>>> for brand in brands:
brand['series_list'] = pd.DataFrame(brand['series_list']).groupby(['series_name', 'series_id']).agg(list).reset_index().to_dict('records')
[
{
"brand_id":"1",
"brand_name":"APPLE",
"series_list":[
{
"series_name":"MacBook",
"series_id":"11",
"products_list":[
{
"_id":"606d7f741492340008497daf",
"cover":[
{
"oss_name":"56f42ec6aa7b87010c3b2b09310d37d6.png"
}
],
"product_name":"MacBook Pro"
}
]
},
{
"series_name":"iPhone",
"series_id":"11",
"products_list":[
{
"_id":"606fdd0805f5710008a82223",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"iPhone12"
}
]
}
]
},
{
"brand_id":"2",
"brand_name":"SONY",
"series_list":[
{
"series_name":"PlayStation",
"series_id":"21",
"products_list":[
{
"_id":"6080d04d2cc7f20008f22670",
"cover":[
{
"oss_name":"0e4ebf0ac613a757fff916a90b2224c9.jpg"
}
],
"product_name":"PS5"
}
]
}
]
}
]