`In [9]: df
Out[9]:
A B C D
2000-01-010.469112 -0.282863 -1.509059 -1.1356322000-01-021.212112 -0.1732150.119209 -1.0442362000-01-03 -0.861849 -2.104569 -0.4949291.0718042000-01-040.721555 -0.706771 -1.0395750.2718602000-01-05 -0.4249720.5670200.276232 -1.0874012000-01-06 -0.6736900.113648 -1.4784270.5249882000-01-070.4047050.577046 -1.715002 -1.0392682000-01-08 -0.370647 -1.157892 -1.3443120.844885
In [10]: df[[’B’, ’A’]] = df[[’A’, ’B’]]
In [11]: df
Out[11]:
A B C D
2000-01-01 -0.2828630.469112 -1.509059 -1.1356322000-01-02 -0.1732151.2121120.119209 -1.0442362000-01-03 -2.104569 -0.861849 -0.4949291.0718042000-01-04 -0.7067710.721555 -1.0395750.2718602000-01-050.567020 -0.4249720.276232 -1.0874012000-01-060.113648 -0.673690 -1.4784270.5249882000-01-070.5770460.404705 -1.715002 -1.0392682000-01-08 -1.157892 -0.370647 -1.3443120.844885
In [21]: dfa[’A’] = list(range(len(dfa.index))) # use this form to create a new column{In [22]: dfa
Out[22]:
A B C D
2000-01-0100.469112 -1.509059 -1.1356322000-01-0211.2121120.119209 -1.0442362000-01-032 -0.861849 -0.4949291.0718042000-01-0430.721555 -1.0395750.2718602000-01-054 -0.4249720.276232 -1.0874012000-01-065 -0.673690 -1.4784270.5249882000-01-0760.404705 -1.715002 -1.0392682000-01-087 -0.370647 -1.3443120.844885
In [94]: df[df[’A’] > 0]
Out[94]:
A B C D E 02000-01-047.0000000.721555 -1.0395750.271860NaNNaN2000-01-050.567020 -0.4249720.276232 -1.087401NaNNaN2000-01-060.113648 -0.673690 -1.4784270.5249887NaN2000-01-070.5770460.404705 -1.715002 -1.039268NaNNaN
In [95]: df2 = DataFrame({’a’ : [’one’, ’one’, ’two’, ’three’, ’two’, ’one’, ’six’],
....: ’b’ : [’x’, ’y’, ’y’, ’x’, ’y’, ’x’, ’x’],
....: ’c’ : randn(7)})
....:
# only want ’two’ or ’three’
In [96]: criterion = df2[’a’].map(lambda x: x.startswith(’t’))
In [97]: df2[criterion]
Out[97]:
a b c
2two y 0.9957613three x 2.3967804two y 0.014871
In [99]: df2[criterion & (df2[’b’] == ’x’)]
Out[99]:
a b c
3three x 2.39678
In [104]: s[s.isin([2, 4, 6])]
Out[104]:
2204dtype: int64
In [107]: s_mi = Series(np.arange(6),
.....: index=pd.MultiIndex.from_product([[0, 1], [’a’, ’b’, ’c’]]))
.....:In [108]: s_mi
Out[108]:
0 a 0
b 1
c 21 a 3
b 4
c 5dtype: int32
In [109]: s_mi.iloc[s_mi.index.isin([(1, ’a’), (2, ’b’), (0, ’c’)])]
Out[109]:
0 c 21 a 3dtype: int32
In [110]: s_mi.iloc[s_mi.index.isin([’a’, ’c’, ’e’], level=1)]
Out[110]:
0 a 0
c 21 a 3
c 5dtype: int32
In [146]: df = DataFrame(randint(n / 2, size=(n, 2)), columns=list(’bc’))
In [147]: df.index.name = ’a’
In [148]: df
Out[148]:
b c
a
023141240341414514601700840942In [149]: df.query(’a < b and b < c’)
Out[149]:
b c
a
023
In [157]: import pandas.util.testing as tm
In [158]: n = 10In [159]: colors = tm.choice([’red’, ’green’], size=n)
In [160]: foods = tm.choice([’eggs’, ’ham’], size=n)
In [163]: index = MultiIndex.from_arrays([colors, foods], names=[’color’, ’food’])
In [164]: df = DataFrame(randn(n, 2), index=index)
In [165]: df
Out[165]:
01
color food
red ham 0.157622 -0.293555
green eggs 0.1115600.597679
red ham -1.2700930.120949
green ham -0.1938981.804172
red ham -0.2346940.939908
green eggs -0.171520 -0.153055
red eggs -0.363095 -0.067318
green eggs 1.4447210.325771
ham -0.855732 -0.697595
eggs -0.276134 -1.258759In [166]: df.query(’color == "red"’)
Out[166]:
01
color food
red ham 0.157622 -0.293555
ham -1.2700930.120949
ham -0.2346940.939908
eggs -0.363095 -0.067318
In [208]: df2 = DataFrame({’a’ : [’one’, ’one’, ’two’, ’three’, ’two’, ’one’, ’six’],
.....: ’b’ : [’x’, ’y’, ’y’, ’x’, ’y’, ’x’, ’x’],
.....: ’c’ : np.random.randn(7)})
.....:
In [209]: df2.duplicated([’a’,’b’])
Out[209]:
0 False
1 False
2 False
3 False
4 True
5 True
6 False
dtype: bool
In [210]: df2.drop_duplicates([’a’,’b’])
Out[210]:
a b c
0one x 0.9327131one y -0.3935102two y -0.5484543three x 1.1307366six x -1.233298
In [211]: df2.drop_duplicates([’a’,’b’], take_last=True)
Out[211]:
a b c
1one y -0.3935103three x 1.1307364two y -0.4472175one x 1.0439216six x -1.233298
In [223]: index = Index(list(range(5)), name=’rows’)
In [224]: columns = Index([’A’, ’B’, ’C’], name=’cols’)
In [225]: df = DataFrame(np.random.randn(5, 3), index=index, columns=columns)
In [226]: df
Out[226]:
cols A B C
rows
00.6037910.3887130.5443311 -0.1529781.9295410.20213820.0249720.117533 -0.18474031.054144 -0.736061 -0.7853524 -1.362549 -0.0635140.487562
In [227]: df[’A’]
Out[227]:
rows
00.6037911 -0.15297820.02497231.0541444 -1.362549
Name: A, dtype: float64
In [250]: indexed2 = data.set_index([’a’, ’b’])
In [251]: indexed2
Out[251]:
c d
a b
bar one z 1two y 2
foo one x 3two w 4
In [255]: data.set_index(’c’, drop=False)
Out[255]:
a b c d
c
z bar one z 1
y bar two y 2
x foo one x 3
w foo two w 4
In [256]: data.set_index([’a’, ’b’], inplace=True)
In [257]: data
Out[257]:
c d
a b
bar one z 1two y 2
foo one x 3two w 4
In [259]: data.reset_index()
Out[259]:
a b c d
0 bar one z 11 bar two y 22 foo one x 33 foo two w 4
You can use the level keyword to remove only a portion ofthe index:
In [260]: frame
Out[260]:
c d
c a b
z bar one z 1
y bar two y 2
x foo one x 3
w foo two w 4
In [261]: frame.reset_index(level=1)
Out[261]:
a c d
c b
z one bar z 1
y two bar y 2
x one foo x 3
w two foo w 4