df = pd.DataFrame({"Name":["Alice", "Bob", "Mallory", "Mallory", "Bob" ,
"Mallory"],"City":["Seattle", "Seattle", "Portland", "Seattle", "Seattle", "Portland"],
"Val":[4,3,3,np.nan,np.nan,4]})
print(df)
df1 = df.groupby(["Name", "City"], as_index=False)['Val'].count()
print('--------------------------groupby(["Name", "City"], as_index=False)[Val].count()')
print(df1)
df1 = df.groupby(["City"], as_index=False).count() # count把每一列的值都返回出现的次数
print('--------------------------groupby([City], as_index=False).count()')
print(df1)
df1 = df.groupby(["City"], as_index=False).size() # size只返回分组字段的出现次数
print('--------------------------.groupby(["City"], as_index=False).size()')
print(df1)
df2=df.groupby(["Name", "City"], as_index=False).count()
print('--------------------------df.groupby(["Name", "City"], as_index=False).count()')
print(df2)
df3=df.groupby(["Name", "City"])['Val'].size().reset_index(name='Size')
print('--------------------------groupby(["Name", "City"])[Val].size().reset_index(name=Size)')
print(df3)
df4=df.groupby(["Name", "City"]).size()
print('--------------------------groupby(["Name", "City"]).size()')
print(df4)
结果如下:
Name City Val
0 Alice Seattle 4.0
1 Bob Seattle 3.0
2 Mallory Portland 3.0
3 Mallory Seattle NaN
4 Bob Seattle NaN
5 Mallory Portland 4.0
--------------------------groupby(["Name", "City"], as_index=False)[Val].count()
Name City Val
0 Alice Seattle 1
1 Bob Seattle 1
2 Mallory Portland 2
3 Mallory Seattle 0
--------------------------groupby([City], as_index=False).count()
City Name Val
0 Portland 2 2
1 Seattle 4 2
--------------------------.groupby(["City"], as_index=False).size()
City
Portland 2
Seattle 4
dtype: int64
--------------------------df.groupby(["Name", "City"], as_index=False).count()
Name City Val
0 Alice Seattle 1
1 Bob Seattle 1
2 Mallory Portland 2
3 Mallory Seattle 0
--------------------------groupby(["Name", "City"])[Val].size().reset_index(name=Size)
Name City Size
0 Alice Seattle 1
1 Bob Seattle 2
2 Mallory Portland 2
3 Mallory Seattle 1
--------------------------groupby(["Name", "City"]).size()
Name City
Alice Seattle 1
Bob Seattle 2
Mallory Portland 2
Seattle 1
dtype: int64