pandas合并、转换、映射、替换

import numpy as np
import pandas as pd
df1=pd.DataFrame({'a':[1,np.nan,5,np.nan],
                  'b':[np.nan,2,np.nan,6],
                  'c':range(2,18,4)})
df2=pd.DataFrame({'a':[5,4,np.nan,3,7],
                  'b':[np.nan,3,4,6,8]})
In[71]: df1.combine_first(df2)

Out[71]: 
     a    b     c
0  1.0  NaN   2.0
1  4.0  2.0   6.0
2  5.0  4.0  10.0
3  3.0  6.0  14.0
4  7.0  8.0   NaN

data=pd.DataFrame(np.arange(6).reshape((2,3)),
                  index=pd.Index(['oo','cc'],name='state'),
                  columns=pd.Index(['one','two','three'],name='number'))
result=data.stack()
In[72]: result

Out[72]: 
state  number
oo     one       0
       two       1
       three     2
cc     one       3
       two       4
       three     5
dtype: int32
In[73]: result.unstack()
Out[73]: 
number  one  two  three
state                  
oo        0    1      2
cc        3    4      5
In[74]: result.unstack(0)
Out[74]: 
state   oo  cc
number        
one      0   3
two      1   4
three    2   5
In[75]: result.unstack('state')
Out[75]: 
state   oo  cc
number        
one      0   3
two      1   4
three    2   5

s1=pd.Series([0,1,2,3],index=['a','b','c','d'])
s2=pd.Series([4,5,6],index=['c','d','e'])
data2=pd.concat([s1,s2],keys=['one','two'])
In[77]: data2
Out[77]: 
one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64
In[78]: data2.unstack()
Out[78]: 
       a    b    c    d    e
one  0.0  1.0  2.0  3.0  NaN
two  NaN  NaN  4.0  5.0  6.0
In[79]: data2.unstack().stack()
Out[79]: 
one  a    0.0
     b    1.0
     c    2.0
     d    3.0
two  c    4.0
     d    5.0
     e    6.0
dtype: float64

data=pd.DataFrame({'k1':['one'] * 3 + ['two'] * 4,
                   'k2':[1,1,2,3,3,4,4]})
data['v1']=range(7)
In[80]: data.duplicated()
Out[80]: 
0    False
1    False
2    False
3    False
4    False
5    False
6    False
dtype: bool
In[81]: data.drop_duplicates()
Out[81]: 
    k1  k2  v1
0  one   1   0
1  one   1   1
2  one   2   2
3  two   3   3
4  two   3   4
5  two   4   5
6  two   4   6
In[82]: data.drop_duplicates(['k1'])
Out[82]: 
    k1  k2  v1
0  one   1   0
3  two   3   3
In[83]: data.drop_duplicates(['k1','k2'])
Out[83]: 
    k1  k2  v1
0  one   1   0
2  one   2   2
3  two   3   3
5  two   4   5
In[84]: data.drop_duplicates(['k1','k2'],keep='last')
Out[84]: 
    k1  k2  v1
1  one   1   1
2  one   2   2
4  two   3   4
6  two   4   6

data=pd.DataFrame({'food':['aa','bb','cc','dd','ee','ff','gg'],
                   'price':[11,22,33,44,55,66,77]})
In[86]: data
Out[86]: 
  food  price
0   aa     11
1   bb     22
2   cc     33
3   dd     44
4   ee     55
5   ff     66
6   gg     77
In[87]: data['food'].map(lambda x:x.upper())
Out[87]: 
0    AA
1    BB
2    CC
3    DD
4    EE
5    FF
6    GG
Name: food, dtype: object

data['food'].map(str.upper)

animal={'aa':'pig','bb':'dog','cc':'horse','dd':'dog','ee':'pig','ff':'dog','gg':'horse'}
#data['animal']=data['food'].map(animal)
data['animal']=data['food'].map(lambda x:animal[x])
In[89]: data['animal']
Out[89]: 
0      pig
1      dog
2    horse
3      dog
4      pig
5      dog
6    horse
Name: animal, dtype: object

dataa=pd.Series([1,-999,2,-999,-1000,3])
dataa.replace(-999,np.nan)
dataa.replace([-999,-1000],np.nan)
dataa.replace([-999,-1000],[np.nan,0])
dataa.replace({-999:np.nan,-1000:0})
In[90]: dataa.replace([-999,-1000],[np.nan,0])
Out[90]: 
0    1.0
1    NaN
2    2.0
3    NaN
4    0.0
5    3.0
dtype: float64

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值