groupby

import pandas as pd
import numpy as np
df = pd.DataFrame({"key1":["a","a","b","b","a"],
                  "key2":["one","two","one","two","one"],
                  "data1":np.random.randint(1,10,5),
                 "data2":np.random.randint(1,10,5)})
df
data1data2key1key2
033aone
114atwo
294bone
344btwo
474aone
df["data1"].groupby(df["key1"]).mean()
key1
a    3.666667
b    6.500000
Name: data1, dtype: float64
key = [1,2,1,2,3]
df["data1"].groupby(key).mean()
1    6.0
2    2.5
3    7.0
Name: data1, dtype: float64
df["data1"].groupby([df["key1"],df["key2"]]).sum()
key1  key2
a     one     10
      two      1
b     one      9
      two      4
Name: data1, dtype: int32
df["data1"].groupby([df["key1"],df["key2"]]).size()
key1  key2
a     one     2
      two     1
b     one     1
      two     1
dtype: int64
df.groupby("key1").sum()
data1data2
key1
a1111
b138
mean = df.groupby(["key1","key2"]).sum()["data1"]
mean
key1  key2
a     one     10
      two      1
b     one      9
      two      4
Name: data1, dtype: int32
mean.unstack()
key2onetwo
key1
a101
b94
for name,group in df.groupby("key1"):
    print(name)
    print(group)
a
   data1  data2 key1 key2
0      3      3    a  one
1      1      4    a  two
4      7      4    a  one
b
   data1  data2 key1 key2
2      9      4    b  one
3      4      4    b  two
df.groupby(df.dtypes, axis = 1).sum()
int32object
06aone
15atwo
213bone
38btwo
411aone
df = pd.DataFrame(np.random.randint(1,10,(5,5)),
                 columns=["a","b","c","d","e"],
                 index=["Alice","Bob","Candy","Dark","Emily"])
df.ix[1,1:3] = np.NaN
df
abcde
Alice44.05.046
Bob6NaNNaN81
Candy79.06.088
Dark36.01.039
Emily23.06.091
mapping = {"a":"red","b":"red","c":"blue","d":"orange","e":"blue"}
grouped = df.groupby(mapping, axis=1)
grouped.sum()
blueorangered
Alice11.04.08.0
Bob1.08.06.0
Candy14.08.016.0
Dark10.03.09.0
Emily7.09.05.0
grouped.count()
blueorangered
Alice212
Bob111
Candy212
Dark212
Emily212
df = pd.DataFrame(np.random.randint(1,10,(5,5)),
                 columns=["a","b","c","d","e"],
                 index=["Alice","Bob","Candy","Dark","Emily"])
df
abcde
Alice79898
Bob84291
Candy96111
Dark66811
Emily35886
def _group_key(idx):
    print(idx)
    return(idx)
df.groupby(_group_key).size()
Alice
Bob
Candy
Dark
Emily





Alice    1
Bob      1
Candy    1
Dark     1
Emily    1
dtype: int64
def _group_key(idx):
    print(idx)
    return len(idx)
df.groupby(_group_key).size()
Alice
Bob
Candy
Dark
Emily





3    1
4    1
5    3
dtype: int64
df.groupby(len).size()
3    1
4    1
5    3
dtype: int64
df.groupby(len).sum()
abcde
384291
466811
51920171815
columns = pd.MultiIndex.from_arrays([["china","usa","china","usa","china"],
                                    ["A","A","B","C","B"]], names = ["country","index"])
df = pd.DataFrame(np.random.randint(1,10,(5,5)),columns=columns)
df
countrychinausachinausachina
indexAABCB
065559
195292
216592
389133
422796
df.groupby(level="country", axis=1).sum()
countrychinausa
02010
11314
2815
31212
41511
df.groupby(level="index", axis=1).sum()
indexABC
011145
11449
2779
31743
44139
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值