# -*- coding: utf-8 -*-
import pandas as pd
from pandas import Series,DataFrame
import numpy as np
'''
合并重叠数据
'''
a=Series([np.nan,2.5,np.nan,3.5,4.5,np.nan],index=list('fedcba'))
b=Series(np.arange(len(a)),index=list('fedcba'))
print(a)
# f NaN
# e 2.5
# d NaN
# c 3.5
# b 4.5
# a NaN
b['a']=np.nan
print(b)
# f 0
# e 1
# d 2
# c 3
# b 4
# a NaN
#np的where方法
c=np.where(pd.isnull(a),b,a)#a的值如果为Na矩阵值就取b的值,如果不为Na矩阵值取a的值
print(np.where(pd.isnull(a),b,a))#[0. 2.5 2. 3.5 4.5 nan ]
#combin_frist有一样的gongn
print(b[:-2].combine_first(a[2:]))
# a NaN
# b 4.5
# c 3.0
# d 2.0
# e 1.0
# f 0.0
df1=DataFrame({'a':[1,np.nan,5,np.nan],
'b':[np.nan,2,np.nan,6],
'c':range(2,18,4)})#rang的步进运算
print(df1)
# a b c
# 0 1.0 NaN 2
# 1 NaN 2.0 6
# 2 5.0 NaN 10
# 3 NaN 6.0 14
df2=DataFrame({'a':[5,4,np.nan,3,7],
'b':[np.nan,3,4,6,8]})
print(df2)
# a b
# 0 5.0 NaN
# 1 4.0 3.0
# 2 NaN 4.0
# 3 3.0 6.0
# 4 7.0 8.0
#与np.where是一个功能,只是combin_frist更强大,面向的是矩阵
print(df1.combine_first(df2))
# a b c
# 0 1.0 NaN 2.0
# 1 4.0 2.0 6.0
# 2 5.0 4.0 10.0
# 3 3.0 6.0 14.0
# 4 7.0 8.0 NaN
'''
重塑和轴转向
stack 将数据的列“旋转”为行
unstack 将数据的行“旋转”为列
'''
data=DataFrame(np.arange(6).reshape(2,3),index=pd.Index(['Ohio','Colorado'],name='state'),columns=pd.Index(['one','two','three'],name='number'))
print(data)
# number one two three
# state
# Ohio 0 1 2
# Colorado 3 4 5
# 将数据的列“旋转”为行
print(data.stack())
# state number
# Ohio one 0
# two 1
# three 2
# Colorado one 3
# two 4
# three 5
#unstack 将数据的行“旋转”为列
print(data.unstack())
# number state
# one Ohio 0
# Colorado 3
# two Ohio 1
# Colorado 4
# three Ohio 2
# Colorado 5
#数据重排
print(data.stack().unstack())
# number one two three
# state
# Ohio 0 1 2
# Colorado 3 4 5
#数据的转置
print(data.stack().unstack(0))
# state Ohio Colorado
# number
# one 0 3
# two 1 4
# three 2 5
#还可以根据名称进行操作
print(data.stack().unstack('state'))
# state Ohio Colorado
# number
# one 0 3
# two 1 4
# three 2 5
print(data.T)
# state Ohio Colorado
# number
# one 0 3
# two 1 4
# three 2 5
#没有数据的会自动的补全Na
s1=Series(np.arange(4),index=list('abcd'))
s2=Series(4+np.arange(3),index=list('cde'))
data2=pd.concat([s1,s2],keys=['one','two'])
print(data2)
# one a 0
# b 1
# c 2
# d 3
# two c 4
# d 5
# e 6
print(data2.unstack())
# a b c d e
# one 0.0 1.0 2.0 3.0 NaN
# two NaN NaN 4.0 5.0 6.0
#不忽略缺失值
print(data2.unstack().stack(dropna=False))
# one a 0.0
# b 1.0
# c 2.0
# d 3.0
# e NaN
# two a NaN
# b NaN
# c 4.0
# d 5.0
# e 6.0
result=data.stack()
print(result)
# state number
# Ohio one 0
# two 1
# three 2
# Colorado one 3
# two 4
# three 5
#对DataFrame进行unstack操作时,作为旋转轴的级别将会成为结果中最低级别
df=DataFrame({'left':result,'right':5+result},columns=pd.Index(['left','right'],name='side'))
print(df)
# side left right
# state number
# Ohio one 0 5
# two 1 6
# three 2 7
# Colorado one 3 8
# two 4 9
# three 5 10
print(df.unstack('state'))
# side left right
# state Ohio Colorado Ohio Colorado
# number
# one 0 3 5 8
# two 1 4 6 9
# three 2 5 7 10
print(df.unstack('state').stack('side'))#行变列,列变行
# state Colorado Ohio
# number side
# one left 3 0
# right 8 5
# two left 4 1
# right 9 6
# three left 5 2
# right 10 7
python数据分析十四:pandas矩阵的行列转换(stack、unstack)
最新推荐文章于 2024-09-21 05:41:09 发布