python数据分析十四:pandas矩阵的行列转换(stack、unstack)

# -*- coding: utf-8 -*-
import pandas as pd

from pandas import Series,DataFrame

import numpy as np

'''
合并重叠数据
'''
a=Series([np.nan,2.5,np.nan,3.5,4.5,np.nan],index=list('fedcba'))
b=Series(np.arange(len(a)),index=list('fedcba'))
print(a)
# f    NaN
# e    2.5
# d    NaN
# c    3.5
# b    4.5
# a    NaN
b['a']=np.nan
print(b)
# f    0
# e    1
# d    2
# c    3
# b    4
# a    NaN

#np的where方法
c=np.where(pd.isnull(a),b,a)#a的值如果为Na矩阵值就取b的值,如果不为Na矩阵值取a的值
print(np.where(pd.isnull(a),b,a))#[0.  2.5 2.  3.5 4.5 nan ]

#combin_frist有一样的gongn
print(b[:-2].combine_first(a[2:]))
# a    NaN
# b    4.5
# c    3.0
# d    2.0
# e    1.0
# f    0.0

df1=DataFrame({'a':[1,np.nan,5,np.nan],
               'b':[np.nan,2,np.nan,6],
               'c':range(2,18,4)})#rang的步进运算
print(df1)
#     a    b   c
# 0  1.0  NaN   2
# 1  NaN  2.0   6
# 2  5.0  NaN  10
# 3  NaN  6.0  14

df2=DataFrame({'a':[5,4,np.nan,3,7],
               'b':[np.nan,3,4,6,8]})
print(df2)
#      a    b
# 0  5.0  NaN
# 1  4.0  3.0
# 2  NaN  4.0
# 3  3.0  6.0
# 4  7.0  8.0

#与np.where是一个功能,只是combin_frist更强大,面向的是矩阵
print(df1.combine_first(df2))
#      a    b     c
# 0  1.0  NaN   2.0
# 1  4.0  2.0   6.0
# 2  5.0  4.0  10.0
# 3  3.0  6.0  14.0
# 4  7.0  8.0   NaN

'''
重塑和轴转向
stack  将数据的列“旋转”为行
unstack  将数据的行“旋转”为列
'''
data=DataFrame(np.arange(6).reshape(2,3),index=pd.Index(['Ohio','Colorado'],name='state'),columns=pd.Index(['one','two','three'],name='number'))
print(data)
# number    one  two  three
# state
# Ohio        0    1      2
# Colorado    3    4      5

# 将数据的列“旋转”为行
print(data.stack())
# state     number
# Ohio      one       0
#           two       1
#           three     2
# Colorado  one       3
#           two       4
#           three     5

#unstack  将数据的行“旋转”为列
print(data.unstack())
# number  state
# one     Ohio        0
#         Colorado    3
# two     Ohio        1
#         Colorado    4
# three   Ohio        2
#         Colorado    5

#数据重排
print(data.stack().unstack())
# number    one  two  three
# state
# Ohio        0    1      2
# Colorado    3    4      5

#数据的转置
print(data.stack().unstack(0))
# state   Ohio  Colorado
# number
# one        0         3
# two        1         4
# three      2         5

#还可以根据名称进行操作
print(data.stack().unstack('state'))
# state   Ohio  Colorado
# number
# one        0         3
# two        1         4
# three      2         5

print(data.T)
# state   Ohio  Colorado
# number
# one        0         3
# two        1         4
# three      2         5

#没有数据的会自动的补全Na
s1=Series(np.arange(4),index=list('abcd'))
s2=Series(4+np.arange(3),index=list('cde'))

data2=pd.concat([s1,s2],keys=['one','two'])
print(data2)
# one  a    0
#      b    1
#      c    2
#      d    3
# two  c    4
#      d    5
#      e    6

print(data2.unstack())
#        a    b    c    d    e
# one  0.0  1.0  2.0  3.0  NaN
# two  NaN  NaN  4.0  5.0  6.0

#不忽略缺失值
print(data2.unstack().stack(dropna=False))
# one  a    0.0
#      b    1.0
#      c    2.0
#      d    3.0
#      e    NaN
# two  a    NaN
#      b    NaN
#      c    4.0
#      d    5.0
#      e    6.0

result=data.stack()
print(result)
# state     number
# Ohio      one       0
#           two       1
#           three     2
# Colorado  one       3
#           two       4
#           three     5


#对DataFrame进行unstack操作时,作为旋转轴的级别将会成为结果中最低级别
df=DataFrame({'left':result,'right':5+result},columns=pd.Index(['left','right'],name='side'))
print(df)
# side             left  right
# state    number
# Ohio     one        0      5
#          two        1      6
#          three      2      7
# Colorado one        3      8
#          two        4      9
#          three      5     10

print(df.unstack('state'))
# side   left          right
# state  Ohio Colorado  Ohio Colorado
# number
# one       0        3     5        8
# two       1        4     6        9
# three     2        5     7       10

print(df.unstack('state').stack('side'))#行变列,列变行
# state         Colorado  Ohio
# number side
# one    left          3     0
#        right         8     5
# two    left          4     1
#        right         9     6
# three  left          5     2
#        right        10     7



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值