import numpy as np
import pandas as pd
dates = pd.date_range('1/1/2000', periods=8)
dfa = pd.DataFrame(np.random.randn(8, 4),index=dates, columns=['A', 'B', 'C', 'D'])
dfa
| A | B | C | D |
---|
2000-01-01 | 0.080673 | -0.692551 | -0.383200 | -0.218372 |
---|
2000-01-02 | 0.727506 | 0.996998 | 1.167529 | 1.647872 |
---|
2000-01-03 | 1.045695 | -1.170663 | -0.187076 | -0.547368 |
---|
2000-01-04 | -0.638513 | 1.175356 | 1.066186 | 0.098747 |
---|
2000-01-05 | -0.213762 | 0.877078 | -0.929841 | 3.416403 |
---|
2000-01-06 | 0.164324 | -0.501147 | 0.923230 | 0.104105 |
---|
2000-01-07 | 0.544615 | -0.569205 | -0.204508 | 1.895929 |
---|
2000-01-08 | -1.511657 | 0.422340 | 0.093648 | 0.093373 |
---|
dfa.A = list(range(len(dfa.index)))
dfa.A
2000-01-01 0
2000-01-02 1
2000-01-03 2
2000-01-04 3
2000-01-05 4
2000-01-06 5
2000-01-07 6
2000-01-08 7
Freq: D, Name: A, dtype: int64
dfa['A'] = list(range(len(dfa.index)))
dfa.A
2000-01-01 0
2000-01-02 1
2000-01-03 2
2000-01-04 3
2000-01-05 4
2000-01-06 5
2000-01-07 6
2000-01-08 7
Freq: D, Name: A, dtype: int64
x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]})
x.iloc[1] = {'x': 9, 'y': 99}
x
dfa[:5]
dfa[::2]
dfa[::-1]
| A | B | C | D |
---|
2000-01-08 | 7 | 0.422340 | 0.093648 | 0.093373 |
---|
2000-01-07 | 6 | -0.569205 | -0.204508 | 1.895929 |
---|
2000-01-06 | 5 | -0.501147 | 0.923230 | 0.104105 |
---|
2000-01-05 | 4 | 0.877078 | -0.929841 | 3.416403 |
---|
2000-01-04 | 3 | 1.175356 | 1.066186 | 0.098747 |
---|
2000-01-03 | 2 | -1.170663 | -0.187076 | -0.547368 |
---|
2000-01-02 | 1 | 0.996998 | 1.167529 | 1.647872 |
---|
2000-01-01 | 0 | -0.692551 | -0.383200 | -0.218372 |
---|
dfa.loc["20000107":]
| A | B | C | D |
---|
2000-01-07 | 6 | -0.569205 | -0.204508 | 1.895929 |
---|
2000-01-08 | 7 | 0.422340 | 0.093648 | 0.093373 |
---|
dfa.loc["20000103"] > 0
A True
B False
C False
D False
Name: 2000-01-03 00:00:00, dtype: bool
dfa.loc[:,dfa.loc["20000103"] > 0]
| A |
---|
2000-01-01 | 0 |
---|
2000-01-02 | 1 |
---|
2000-01-03 | 2 |
---|
2000-01-04 | 3 |
---|
2000-01-05 | 4 |
---|
2000-01-06 | 5 |
---|
2000-01-07 | 6 |
---|
2000-01-08 | 7 |
---|
dfa.iloc[:3]
| A | B | C | D |
---|
2000-01-01 | 0 | -0.692551 | -0.383200 | -0.218372 |
---|
2000-01-02 | 1 | 0.996998 | 1.167529 | 1.647872 |
---|
2000-01-03 | 2 | -1.170663 | -0.187076 | -0.547368 |
---|
dfa.iloc[1:5, 2:4]
| C | D |
---|
2000-01-02 | 1.167529 | 1.647872 |
---|
2000-01-03 | -0.187076 | -0.547368 |
---|
2000-01-04 | 1.066186 | 0.098747 |
---|
2000-01-05 | -0.929841 | 3.416403 |
---|
dfa.iloc[[1, 3, 5], [1, 3]]
| B | D |
---|
2000-01-02 | 0.996998 | 1.647872 |
---|
2000-01-04 | 1.175356 | 0.098747 |
---|
2000-01-06 | -0.501147 | 0.104105 |
---|
dfa.iloc[1:3, :]
| A | B | C | D |
---|
2000-01-02 | 1 | 0.996998 | 1.167529 | 1.647872 |
---|
2000-01-03 | 2 | -1.170663 | -0.187076 | -0.547368 |
---|
dfa.loc[dfa.index[[0,2]],"A"]
2000-01-01 0
2000-01-03 2
Name: A, dtype: int64
dfa.iloc[[0,2],dfa.columns.get_loc("A")]
2000-01-01 0
2000-01-03 2
Freq: 2D, Name: A, dtype: int64
dfa.iloc[[0,2],dfa.columns.get_indexer(["A","C"])]
| A | C |
---|
2000-01-01 | 0 | -0.383200 |
---|
2000-01-03 | 2 | -0.187076 |
---|
dfa.iloc[7:10]
dfa.iloc[[6,7,9,10]]
| A | B | C | D |
---|
2000-01-07 | 6 | -0.569205 | -0.204508 | 1.895929 |
---|
2000-01-08 | 7 | 0.422340 | 0.093648 | 0.093373 |
---|
df1 = pd.DataFrame(np.random.randn(6, 4),index=list('abcdef'),columns=list('ABCD'))
df1
| A | B | C | D |
---|
a | 0.547939 | -1.667014 | -1.663774 | -0.718178 |
---|
b | -0.091170 | 0.636030 | 0.942320 | -0.498451 |
---|
c | 0.465275 | 0.502978 | 0.137662 | 0.285483 |
---|
d | -1.531874 | -0.545184 | -0.574339 | -0.052297 |
---|
e | 1.231483 | -0.813094 | 0.368048 | 1.691316 |
---|
f | -0.199404 | -1.502815 | 0.420248 | 0.537462 |
---|
df1.loc[lambda dfa: dfa['A'] > 0, :]
| A | B | C | D |
---|
a | 0.547939 | -1.667014 | -1.663774 | -0.718178 |
---|
c | 0.465275 | 0.502978 | 0.137662 | 0.285483 |
---|
e | 1.231483 | -0.813094 | 0.368048 | 1.691316 |
---|
df1.loc[:, lambda df: ['A', 'B']]
| A | B |
---|
a | 0.547939 | -1.667014 |
---|
b | -0.091170 | 0.636030 |
---|
c | 0.465275 | 0.502978 |
---|
d | -1.531874 | -0.545184 |
---|
e | 1.231483 | -0.813094 |
---|
f | -0.199404 | -1.502815 |
---|
df1.where(df1 > 0)
| A | B | C | D |
---|
a | 0.547939 | NaN | NaN | NaN |
---|
b | NaN | 0.636030 | 0.942320 | NaN |
---|
c | 0.465275 | 0.502978 | 0.137662 | 0.285483 |
---|
d | NaN | NaN | NaN | NaN |
---|
e | 1.231483 | NaN | 0.368048 | 1.691316 |
---|
f | NaN | NaN | 0.420248 | 0.537462 |
---|
dfa > 0
| A | B | C | D |
---|
2000-01-01 | False | False | False | False |
---|
2000-01-02 | True | True | True | True |
---|
2000-01-03 | True | False | False | False |
---|
2000-01-04 | True | True | True | True |
---|
2000-01-05 | True | True | False | True |
---|
2000-01-06 | True | False | True | True |
---|
2000-01-07 | True | False | False | True |
---|
2000-01-08 | True | True | True | True |
---|
dfa.iloc[0:6] > 0
| A | B | C | D |
---|
2000-01-01 | False | False | False | False |
---|
2000-01-02 | True | True | True | True |
---|
2000-01-03 | True | False | False | False |
---|
2000-01-04 | True | True | True | True |
---|
2000-01-05 | True | True | False | True |
---|
2000-01-06 | True | False | True | True |
---|
df1.where(dfa.iloc[0:7] > 0)
| A | B | C | D |
---|
a | NaN | NaN | NaN | NaN |
---|
b | NaN | NaN | NaN | NaN |
---|
c | NaN | NaN | NaN | NaN |
---|
d | NaN | NaN | NaN | NaN |
---|
e | NaN | NaN | NaN | NaN |
---|
f | NaN | NaN | NaN | NaN |
---|
df1.where(df1 > 0,dfa.iloc[0:7],inplace=True)
df1
| A | B | C | D |
---|
a | 0.547939 | NaN | NaN | NaN |
---|
b | NaN | 0.636030 | 0.942320 | NaN |
---|
c | 0.465275 | 0.502978 | 0.137662 | 0.285483 |
---|
d | NaN | NaN | NaN | NaN |
---|
e | 1.231483 | NaN | 0.368048 | 1.691316 |
---|
f | NaN | NaN | 0.420248 | 0.537462 |
---|
df1.apply(lambda x, y: x.where(x > 0, y), y=df1['A'])
| A | B | C | D |
---|
a | 0.547939 | 0.547939 | 0.547939 | 0.547939 |
---|
b | NaN | 0.636030 | 0.942320 | NaN |
---|
c | 0.465275 | 0.502978 | 0.137662 | 0.285483 |
---|
d | NaN | NaN | NaN | NaN |
---|
e | 1.231483 | 1.231483 | 0.368048 | 1.691316 |
---|
f | NaN | NaN | 0.420248 | 0.537462 |
---|
df1.where(lambda x: x > 4, lambda x: x + 10)
| A | B | C | D |
---|
a | 10.547939 | NaN | NaN | NaN |
---|
b | NaN | 10.636030 | 10.942320 | NaN |
---|
c | 10.465275 | 10.502978 | 10.137662 | 10.285483 |
---|
d | NaN | NaN | NaN | NaN |
---|
e | 11.231483 | NaN | 10.368048 | 11.691316 |
---|
f | NaN | NaN | 10.420248 | 10.537462 |
---|
df = pd.DataFrame(np.random.rand(10, 3), columns=list('abc'))
df[(df['a'] < df['b']) & (df['b'] < df['c'])]
| a | b | c |
---|
0 | 0.153726 | 0.330154 | 0.708700 |
---|
2 | 0.263018 | 0.372941 | 0.709408 |
---|
df.query('(a < b) & (b < c)')
| a | b | c |
---|
0 | 0.153726 | 0.330154 | 0.708700 |
---|
2 | 0.263018 | 0.372941 | 0.709408 |
---|
df = pd.DataFrame(np.random.randint(10, size=(10, 2)), columns=list('bc'))
df
| b | c |
---|
0 | 8 | 7 |
---|
1 | 4 | 6 |
---|
2 | 0 | 3 |
---|
3 | 6 | 4 |
---|
4 | 7 | 1 |
---|
5 | 5 | 0 |
---|
6 | 2 | 1 |
---|
7 | 2 | 1 |
---|
8 | 6 | 6 |
---|
9 | 1 | 0 |
---|
df.query('index < b < c')
df = pd.DataFrame(np.random.rand(10, 3), columns=list('abc'))
df
| a | b | c |
---|
0 | 0.744327 | 0.577067 | 0.567892 |
---|
1 | 0.247089 | 0.507576 | 0.454971 |
---|
2 | 0.098770 | 0.939322 | 0.905982 |
---|
3 | 0.396389 | 0.943743 | 0.357621 |
---|
4 | 0.066892 | 0.056997 | 0.537665 |
---|
5 | 0.194226 | 0.945177 | 0.101624 |
---|
6 | 0.640024 | 0.533560 | 0.793600 |
---|
7 | 0.193513 | 0.398429 | 0.031752 |
---|
8 | 0.731469 | 0.160766 | 0.175481 |
---|
9 | 0.859200 | 0.089247 | 0.611211 |
---|
df2 = pd.DataFrame(np.random.rand(12, 3), columns=df.columns)
df2
| a | b | c |
---|
0 | 0.445768 | 0.965003 | 0.793259 |
---|
1 | 0.036539 | 0.764496 | 0.098846 |
---|
2 | 0.761857 | 0.790646 | 0.059684 |
---|
3 | 0.118360 | 0.613896 | 0.363632 |
---|
4 | 0.780124 | 0.868423 | 0.832651 |
---|
5 | 0.968018 | 0.085531 | 0.163136 |
---|
6 | 0.787129 | 0.593285 | 0.341474 |
---|
7 | 0.420828 | 0.359069 | 0.182200 |
---|
8 | 0.929584 | 0.837984 | 0.789769 |
---|
9 | 0.050163 | 0.213084 | 0.134281 |
---|
10 | 0.458949 | 0.102885 | 0.669524 |
---|
11 | 0.222281 | 0.178958 | 0.602505 |
---|
df1.query("df2["a"] > 0 & df2["c"] < 0")
File "<ipython-input-78-0cace329886b>", line 1
df1.query("df2["a"] > 0 & df2["c"] < 0")
^
SyntaxError: invalid syntax