pandas之汇总和计算描述统计到层次化索引

df=DataFrame([[1.4,np.nan],[7.1,-4.5],[np.nan,np.nan],[0.75,-1.3]],index=['a','b','c','d'],columns=['one','two'])
df
df.sum()
df.sum(axis=1)
df
df.sum(axis=1,skipna=False)
df
df.idxmax()
df
df.cumsum()
df.describe()
obj=Series(['a','a','b','c']*4)
obj
obj.describe()
cl
c
l
import pandas.io.data as web
all_data={}
for ticker in ['AAPL','IBM',"MSFT",'GOOG']:
    all_data[ticker]=web.get_data_yahoo(ticker,'1/1/2000'.'1/1/2010')

for ticker in ['AAPL','IBM',"MSFT",'GOOG']:
    all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000',
    '1/1/2010')





all_data
for ticker in ['AAPL','IBM',"MSFT",'GOOG']:
    all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2000','1/1/2010')

for ticker in ['AAPL','IBM',"MSFT",'GOOG']:
    all_data[ticker] = web.get_data_yahoo(ticker, '1/1/2016','1/1/2017')

obj=Series(['c','a','d','a','a','b','b','c','c'])
uniques=obj.unique()
uniques
obj.value_counts()
pd.value_counts(obj.values,sort=False)
mask=obj.isin(['b','c'])
mask
obj[mask]
data=DataFrame({'Qu1':[1,3,4,3,4],'Qu2':[2,3,1,2,3],'Qu3':[1,5,2,4,4]})
data
data.apply(pd.value_counts())
data.apply(pd.value_counts)
data.apply(pd.value_counts).fillna(0)
string_data=Series(['aardvark','artichoke',np.nan,'avocado'])
string_data
string_data.isnull()
string_data[0]=None
string_data.isnull()
from numpy import nan as NA
data=Series([1,NA,3.5,NA,7])
data
data.dropna()
data
data[data.notnull()]
data=DataFrame([[1,6.5,3],[1.,NA,NA],[NA,NA,NA],[NA,6.5,3.]])
data
cleaned =data.dropna()
cleaned
cleaned =data.dropna(how='all')
cleaned
data[4]=NA
data
data.append(NA)
data.append(333)
data
cleaned =data.dropna(how='all',axis=1)
cleaned
df=DataFrame(np.random.randn(7,3))
df
df.ix[:4,1]=NA
df
df.ix[:2,2]=NA
df
df.dropna(thresh=3)
df
df.fillna(0)
df.fillna({1:0.5,3:-1})
df
_=df.fillna(0,inplace=Ture)
_=df.fillna(0,inplace=True)
df
df=DataFrame(np.random.randn(6,3))
df
df.ix[2:,1]=NA
df
df.ix[4:,2]=NA
df
df.fillna(method='ffill')
df
df.fillna(method='ffill',limit=2)
data=Series([1.,NA,3.5,NA,7])
data
data.fillna(data.mean())
data=Series(np.random.randn(10),index=[['a','a','a','b','b','b','c','c','d','d'],[1,2,3,1,2,3,1,2,2,3]])
data
data.index
data['b']
data['b':'c']
data.ix[['b','d']]
data
data[:,2]
data
data.unstack()
data.unstack().stack()
frame=DataFrame(np.arange(12).reshape((4,3)),index=[['a','a','b','b'],[1,2,1,2]],columns=[['Ohio','Ohio','Colorado'],['Green','Red','Green']])
frame
frame.index.names=['key1','key2']
frame
frame.columns.names=['state','color']
frame
frame['Ohio']
MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Green']],names=['state','color'])
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值