python pandas分组聚合时字符串相加

最新推荐文章于 2024-05-06 14:07:24 发布

weixin_45903952

最新推荐文章于 2024-05-06 14:07:24 发布

阅读量2.4k

点赞数

本文链接：https://blog.csdn.net/weixin_45903952/article/details/105334082

版权

python pandas分组聚合时字符串相加

import pandas as pd


chengji=[[2,600100,95,100,"fe1"],[2,600100,98,99,"fe2"],[2,600100,95,98,"fe3"],[3,89774,98,97,"fe3"],[2,89774,90,96,"fe3"],[3,89774,94,93,"fe4"]]
data=pd.DataFrame(chengji,columns=['userid','movid','timestamp','时','地'])
print(data.index,data.columns)		#竖序列，横标题
print("*"*50,"原始数据")
#data['综合']=""
print(data)
data1=data.groupby(['userid','movid'])   #单一分组，可用([..],[...])多列分组
data2=data1.apply(sum)

#data2.index.names=['月份','年份']
data2.drop(['userid','movid'],axis=1,inplace=True)
data2.reset_index(level=None, drop=False, inplace=True, col_level=0, col_fill="") 
print(data2)

print(data1.groups)
for name,group in data1:
	print(name)
	print(group)

RangeIndex(start=0, stop=6, step=1) Index([‘userid’, ‘movid’, ‘timestamp’, ‘时’, ‘地’], dtype=‘object’)
************************************************** 原始数据
userid movid timestamp 时地
0 2 600100 95 100 fe1
1 2 600100 98 99 fe2
2 2 600100 95 98 fe3
3 3 89774 98 97 fe3
4 2 89774 90 96 fe3
5 3 89774 94 93 fe4
userid movid timestamp 时地
0 2 89774 90 96 fe3
1 2 600100 288 297 fe1fe2fe3
2 3 89774 192 190 fe3fe4

就是说分组聚合时，sum对于字符串的作用是相连，意外的收获。

略有变化的三种方法，第三种用了transform分组的方法，与一、二种的groupby的方法略有不同。

import pandas as pd

chengji=[[2,600100,95,100,"fe1"],[2,600100,98,99,"fe2"],[2,600100,95,98,"fe3"],[3,89774,98,97,"fe3"],[2,89774,90,96,"fe3"],[3,89774,94,93,"fe4"]]
data=pd.DataFrame(chengji,columns=['userid','movid','timestamp','时','tag'])
print(data.index,data.columns)		#竖序列，横标题
print("*"*50,"原始数据")
#data['综合']=""
print(data)
print("*"*50,"方法一")
data1=data.groupby(['userid','movid'])   #单一分组，可用([..],[...])多列分组
data2=data1.apply(sum)                #把相同ID的tag字符串合并
#data2.index.names=['月份','年份']
data2.drop(['userid','movid'],axis=1,inplace=True)
data2.reset_index(level=None, drop=False, inplace=True, col_level=0, col_fill="") 
print(data2)
print("*"*50,"方法二")
data2=data1['时','tag'].apply(sum)                #把相同ID的tag字符串合并
data2=data2.reset_index() 
print(data2)
print("-"*25,"二")
data2=data1['tag'].apply(sum)# ['userid'==3]               #把相同ID的tag字符串合并
data2=data2.reset_index()
data2=data2[data2['userid']==2] 
print(data2)
print("*"*50,"方法三")
# apply和transform都可以进行分组计算，计算结果一样
# 表现形式不同，apply多层索引,图形直观，简洁
# transform 一层索引，所有的数据，级联方便,不改变原来的结构，可与原来的数据concat()拼接
data2=data1['时','tag'].transform(sum).add_prefix('sum111')   #把相同ID的tag字符串合并,加前缀sum111
data2=data2.reset_index() 
print(data2)
data3=pd.concat([data,data2],axis=1)
print(data3)


print("*"*50,"group的显示方法")
print(data1.groups)
for name,group in data1:
	print(name)
	print(group)

运行结果：

RangeIndex(start=0, stop=6, step=1) Index([‘userid’, ‘movid’, ‘timestamp’, ‘时’, ‘tag’], dtype=‘object’)
************************************************** 原始数据
userid movid timestamp 时 tag
0 2 600100 95 100 fe1
1 2 600100 98 99 fe2
2 2 600100 95 98 fe3
3 3 89774 98 97 fe3
4 2 89774 90 96 fe3
5 3 89774 94 93 fe4
************************************************** 方法一
userid movid timestamp 时 tag
0 2 89774 90 96 fe3
1 2 600100 288 297 fe1fe2fe3
2 3 89774 192 190 fe3fe4
************************************************** 方法二
userid movid 时 tag
0 2 89774 96 fe3
1 2 600100 297 fe1fe2fe3
2 3 89774 190 fe3fe4
------------------------- 二
userid movid tag
0 2 89774 fe3
1 2 600100 fe1fe2fe3
************************************************** 方法三
index sum111时 sum111tag
0 0 297 fe1fe2fe3
1 1 297 fe1fe2fe3
2 2 297 fe1fe2fe3
3 3 190 fe3fe4
4 4 96 fe3
5 5 190 fe3fe4
userid movid timestamp 时 tag index sum111时 sum111tag
0 2 600100 95 100 fe1 0 297 fe1fe2fe3
1 2 600100 98 99 fe2 1 297 fe1fe2fe3
2 2 600100 95 98 fe3 2 297 fe1fe2fe3
3 3 89774 98 97 fe3 3 190 fe3fe4
4 2 89774 90 96 fe3 4 96 fe3
5 3 89774 94 93 fe4 5 190 fe3fe4
************************************************** group的显示方法
{(2, 89774): Int64Index([4], dtype=‘int64’), (2, 600100): Int64Index([0, 1, 2], dtype=‘int64’), (3, 89774): Int64Index([3, 5], dtype=‘int64’)}
(2, 89774)
userid movid timestamp 时 tag
4 2 89774 90 96 fe3
(2, 600100)
userid movid timestamp 时 tag
0 2 600100 95 100 fe1
1 2 600100 98 99 fe2
2 2 600100 95 98 fe3
(3, 89774)
userid movid timestamp 时 tag
3 3 89774 98 97 fe3
5 3 89774 94 93 fe4

weixin_45903952

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python pandas分组聚合时字符串相加

python pandas分组聚合时字符串相加import pandas as pdchengji=[[2,600100,95,100,"fe1"],[2,600100,98,99,"fe2"],[2,600100,95,98,"fe3"],[3,89774,98,97,"fe3"],[2,89774,90,96,"fe3"],[3,89774,94,93,"fe4"]]data=pd....
复制链接

扫一扫