pandas__11

%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dates = pd.date_range("20160301",periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list("ABCD"))
df
ABCD
2016-03-01-0.2485980.0485890.4938320.379403
2016-03-02-0.677500-1.319923-1.7302550.011082
2016-03-031.041719-1.2487690.287455-0.243821
2016-03-04-1.351725-0.948413-0.168241-0.206230
2016-03-05-0.712360-0.676127-0.218639-1.713820
2016-03-06-2.107682-1.155953-0.982534-1.005173
df1 = df.reindex(index=dates[0:4],columns=list(df.columns) + ["E"])
df1
ABCDE
2016-03-01-0.2485980.0485890.4938320.379403NaN
2016-03-02-0.677500-1.319923-1.7302550.011082NaN
2016-03-031.041719-1.2487690.287455-0.243821NaN
2016-03-04-1.351725-0.948413-0.168241-0.206230NaN
df1.loc[dates[1:3], "E"]=2
df1
ABCDE
2016-03-01-0.2485980.0485890.4938320.379403NaN
2016-03-02-0.677500-1.319923-1.7302550.0110822.0
2016-03-031.041719-1.2487690.287455-0.2438212.0
2016-03-04-1.351725-0.948413-0.168241-0.206230NaN
df1.dropna()
ABCDE
2016-03-02-0.677500-1.319923-1.7302550.0110822.0
2016-03-031.041719-1.2487690.287455-0.2438212.0
df1.fillna(value=5)
ABCDE
2016-03-01-0.2485980.0485890.4938320.3794035.0
2016-03-02-0.677500-1.319923-1.7302550.0110822.0
2016-03-031.041719-1.2487690.287455-0.2438212.0
2016-03-04-1.351725-0.948413-0.168241-0.2062305.0
pd.isnull(df1).any().any()
True
df1.mean()
A   -0.309026
B   -0.867129
C   -0.279302
D   -0.014891
E    2.000000
dtype: float64
df1.mean(axis=1)
2016-03-01    0.168306
2016-03-02   -0.343319
2016-03-03    0.367317
2016-03-04   -0.668652
Freq: D, dtype: float64
df1.cumsum()
ABCDE
2016-03-01-0.2485980.0485890.4938320.379403NaN
2016-03-02-0.926098-1.271334-1.2364230.3904852.0
2016-03-030.115621-2.520104-0.9489680.1466644.0
2016-03-04-1.236104-3.468517-1.117209-0.059566NaN
s = pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2)
s
2016-03-01    NaN
2016-03-02    NaN
2016-03-03    1.0
2016-03-04    3.0
2016-03-05    5.0
2016-03-06    NaN
Freq: D, dtype: float64
df
ABCD
2016-03-01-0.2485980.0485890.4938320.379403
2016-03-02-0.677500-1.319923-1.7302550.011082
2016-03-031.041719-1.2487690.287455-0.243821
2016-03-04-1.351725-0.948413-0.168241-0.206230
2016-03-05-0.712360-0.676127-0.218639-1.713820
2016-03-06-2.107682-1.155953-0.982534-1.005173
df.sub(s, axis="index")
ABCD
2016-03-01NaNNaNNaNNaN
2016-03-02NaNNaNNaNNaN
2016-03-030.041719-2.248769-0.712545-1.243821
2016-03-04-4.351725-3.948413-3.168241-3.206230
2016-03-05-5.712360-5.676127-5.218639-6.713820
2016-03-06NaNNaNNaNNaN
df
ABCD
2016-03-01-0.2485980.0485890.4938320.379403
2016-03-02-0.677500-1.319923-1.7302550.011082
2016-03-031.041719-1.2487690.287455-0.243821
2016-03-04-1.351725-0.948413-0.168241-0.206230
2016-03-05-0.712360-0.676127-0.218639-1.713820
2016-03-06-2.107682-1.155953-0.982534-1.005173
df.apply(np.cumsum)
ABCD
2016-03-01-0.2485980.0485890.4938320.379403
2016-03-02-0.926098-1.271334-1.2364230.390485
2016-03-030.115621-2.520104-0.9489680.146664
2016-03-04-1.236104-3.468517-1.117209-0.059566
2016-03-05-1.948464-4.144644-1.335848-1.773385
2016-03-06-4.056147-5.300596-2.318382-2.778558
df.apply(lambda x : x.max() - x.min())
A    3.149402
B    1.368512
C    2.224087
D    2.093223
dtype: float64
def _sum(x):
    print(type(x))
    return x.sum()
df.apply(_sum)
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>





A   -4.056147
B   -5.300596
C   -2.318382
D   -2.778558
dtype: float64
s = pd.Series(np.random.randint(10,20,size=20))
s
0     14
1     19
2     15
3     16
4     15
5     13
6     17
7     11
8     16
9     13
10    14
11    12
12    11
13    12
14    14
15    11
16    14
17    13
18    15
19    18
dtype: int32
s.value_counts()
14    4
15    3
13    3
11    3
16    2
12    2
19    1
18    1
17    1
dtype: int64
s.mode()
0    14
dtype: int32
df = pd.DataFrame(np.random.randn(10,4),columns=list("ABCD"))
df
ABCD
0-0.2585110.839909-1.242957-1.542059
1-0.0459731.434317-0.701617-1.780006
2-0.188364-0.1870670.725562-0.706046
30.9991750.7497670.834201-0.545250
40.1673200.444925-1.1007630.588002
50.135248-1.586037-0.638731-0.240570
6-0.457926-1.206899-1.017282-1.336446
70.4362072.1924520.845839-1.458328
81.4850750.189602-0.232063-0.873814
91.0821261.4915390.2907440.336791
df.iloc[:3]
ABCD
0-0.2585110.839909-1.242957-1.542059
1-0.0459731.434317-0.701617-1.780006
2-0.188364-0.1870670.725562-0.706046
df.iloc[3:7]
ABCD
30.9991750.7497670.834201-0.545250
40.1673200.444925-1.1007630.588002
50.135248-1.586037-0.638731-0.240570
6-0.457926-1.206899-1.017282-1.336446
df.iloc[7:]
ABCD
70.4362072.1924520.845839-1.458328
81.4850750.189602-0.232063-0.873814
91.0821261.4915390.2907440.336791
df1 = pd.concat([df.iloc[:3],df.iloc[3:7],df.iloc[7:]])
df1
ABCD
0-0.2585110.839909-1.242957-1.542059
1-0.0459731.434317-0.701617-1.780006
2-0.188364-0.1870670.725562-0.706046
30.9991750.7497670.834201-0.545250
40.1673200.444925-1.1007630.588002
50.135248-1.586037-0.638731-0.240570
6-0.457926-1.206899-1.017282-1.336446
70.4362072.1924520.845839-1.458328
81.4850750.189602-0.232063-0.873814
91.0821261.4915390.2907440.336791
(df==df1).all().all()
True
left = pd.DataFrame({"key":["foo", "foo"],"lval":[1,2]})
right = pd.DataFrame({"key":["foo","foo"],"rval":[4,5]})
left
keylval
0foo1
1foo2
right
keyrval
0foo4
1foo5
#  select * from left inner join right on left.key = right.key
pd.merge(left,right,on="key")
keylvalrval
0foo14
1foo15
2foo24
3foo25
s = pd.Series(np.random.randint(1,5,size=5),index=list("ABCDE"))
s
A    1
B    1
C    4
D    4
E    2
dtype: int32
df.append(s,ignore_index=True)
ABCDE
0-0.2585110.839909-1.242957-1.542059NaN
1-0.0459731.434317-0.701617-1.780006NaN
2-0.188364-0.1870670.725562-0.706046NaN
30.9991750.7497670.834201-0.545250NaN
40.1673200.444925-1.1007630.588002NaN
50.135248-1.586037-0.638731-0.240570NaN
6-0.457926-1.206899-1.017282-1.336446NaN
70.4362072.1924520.845839-1.458328NaN
81.4850750.189602-0.232063-0.873814NaN
91.0821261.4915390.2907440.336791NaN
101.0000001.0000004.0000004.0000002.0
df = pd.DataFrame({"A":["foo","bar","foo","bar",
                       "foo","bar","foo","foo"],
                  "B":["one","one","two","three",
                      "two","two","one","three"],
                  "C":np.random.randn(8),
                  "D":np.random.randn(8)})
df
ABCD
0fooone-0.8835211.259531
1barone-0.3534760.054437
2footwo-0.2570920.574759
3barthree0.678201-1.091763
4footwo-1.6255640.475802
5bartwo1.418208-0.240999
6fooone0.535367-0.925612
7foothree-0.4012020.009708
df.groupby("A").sum()
CD
A
bar1.742932-1.278326
foo-2.6320121.394188
df.groupby(["A", "B"]).sum()
CD
AB
barone-0.3534760.054437
three0.678201-1.091763
two1.418208-0.240999
fooone-0.3481540.333919
three-0.4012020.009708
two-1.8826561.050561
df.groupby(["B", "A"]).sum()
CD
BA
onebar-0.3534760.054437
foo-0.3481540.333919
threebar0.678201-1.091763
foo-0.4012020.009708
twobar1.418208-0.240999
foo-1.8826561.050561
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值