import numpy as np
import pandas as pd
df1=pd.DataFrame({'a':[1,np.nan,5,np.nan],
'b':[np.nan,2,np.nan,6],
'c':range(2,18,4)})
df2=pd.DataFrame({'a':[5,4,np.nan,3,7],
'b':[np.nan,3,4,6,8]})
In[71]: df1.combine_first(df2)
Out[71]:
a b c
0 1.0 NaN 2.0
1 4.0 2.0 6.0
2 5.0 4.0 10.0
3 3.0 6.0 14.0
4 7.0 8.0 NaN
data=pd.DataFrame(np.arange(6).reshape((2,3)),
index=pd.Index(['oo','cc'],name='state'),
columns=pd.Index(['one','two','three'],name='number'))
result=data.stack()
In[72]: result
Out[72]:
state number
oo one 0
two 1
three 2
cc one 3
two 4
three 5
dtype: int32
In[73]: result.unstack()
Out[73]:
number one two three
state
oo 0 1 2
cc 3 4 5
In[74]: result.unstack(0)
Out[74]:
state oo cc
number
one 0 3
two 1 4
three 2 5
In[75]: result.unstack('state')
Out[75]:
state oo cc
number
one 0 3
two 1 4
three 2 5
s1=pd.Series([0,1,2,3],index=['a','b','c','d'])
s2=pd.Series([4,5,6],index=['c','d','e'])
data2=pd.concat([s1,s2],keys=['one','two'])
In[77]: data2
Out[77]:
one a 0
b 1
c 2
d 3
two c 4
d 5
e 6
dtype: int64
In[78]: data2.unstack()
Out[78]:
a b c d e
one 0.0 1.0 2.0 3.0 NaN
two NaN NaN 4.0 5.0 6.0
In[79]: data2.unstack().stack()
Out[79]:
one a 0.0
b 1.0
c 2.0
d 3.0
two c 4.0
d 5.0
e 6.0
dtype: float64
data=pd.DataFrame({'k1':['one'] * 3 + ['two'] * 4,
'k2':[1,1,2,3,3,4,4]})
data['v1']=range(7)
In[80]: data.duplicated()
Out[80]:
0 False
1 False
2 False
3 False
4 False
5 False
6 False
dtype: bool
In[81]: data.drop_duplicates()
Out[81]:
k1 k2 v1
0 one 1 0
1 one 1 1
2 one 2 2
3 two 3 3
4 two 3 4
5 two 4 5
6 two 4 6
In[82]: data.drop_duplicates(['k1'])
Out[82]:
k1 k2 v1
0 one 1 0
3 two 3 3
In[83]: data.drop_duplicates(['k1','k2'])
Out[83]:
k1 k2 v1
0 one 1 0
2 one 2 2
3 two 3 3
5 two 4 5
In[84]: data.drop_duplicates(['k1','k2'],keep='last')
Out[84]:
k1 k2 v1
1 one 1 1
2 one 2 2
4 two 3 4
6 two 4 6
data=pd.DataFrame({'food':['aa','bb','cc','dd','ee','ff','gg'],
'price':[11,22,33,44,55,66,77]})
In[86]: data
Out[86]:
food price
0 aa 11
1 bb 22
2 cc 33
3 dd 44
4 ee 55
5 ff 66
6 gg 77
In[87]: data['food'].map(lambda x:x.upper())
Out[87]:
0 AA
1 BB
2 CC
3 DD
4 EE
5 FF
6 GG
Name: food, dtype: object
data['food'].map(str.upper)
animal={'aa':'pig','bb':'dog','cc':'horse','dd':'dog','ee':'pig','ff':'dog','gg':'horse'}
#data['animal']=data['food'].map(animal)
data['animal']=data['food'].map(lambda x:animal[x])
In[89]: data['animal']
Out[89]:
0 pig
1 dog
2 horse
3 dog
4 pig
5 dog
6 horse
Name: animal, dtype: object
dataa=pd.Series([1,-999,2,-999,-1000,3])
dataa.replace(-999,np.nan)
dataa.replace([-999,-1000],np.nan)
dataa.replace([-999,-1000],[np.nan,0])
dataa.replace({-999:np.nan,-1000:0})
In[90]: dataa.replace([-999,-1000],[np.nan,0])
Out[90]:
0 1.0
1 NaN
2 2.0
3 NaN
4 0.0
5 3.0
dtype: float64