Pandas入门系列(六) -- reindex

数据分析汇总学习

https://blog.csdn.net/weixin_39778570/article/details/81157884

import numpy as np
import pandas as pd 
from pandas import Series, DataFrame

# 创建一个Series对象
s1 = Series([1,2,3,4], index=['A','B','C','D'])
s1
Out[51]: 
A    1
B    2
C    3
D    4
dtype: int64

# Series的reindex操作
s1.reindex(index=['A','B','C','D','E'])
Out[53]: 
A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
dtype: float64

# 缺失值填充
s1.reindex(index=['A','B','C','D','E'], fill_value=10)
Out[54]: 
A     1
B     2
C     3
D     4
E    10
dtype: int64

# 创建一个Series对象s2
s2 = Series(['A','B','C'], index=[1,5,10])

s2
Out[57]: 
1     A
5     B
10    C
dtype: object
# reindex
s2.reindex(index=range(15))
Out[58]: 
0     NaN
1       A
2     NaN
3     NaN
4     NaN
5       B
6     NaN
7     NaN
8     NaN
9     NaN
10      C
11    NaN
12    NaN
13    NaN
14    NaN
dtype: object

# 缺失值的另一种填充方法,满填
s2.reindex(index=range(15), method='ffill')
Out[59]: 
0     NaN
1       A
2       A
3       A
4       A
5       B
6       B
7       B
8       B
9       B
10      C
11      C
12      C
13      C
14      C
dtype: object

# DataFrame的reindex
# 创建DataFrame对象
df1 = DataFrame(np.random.rand(25).reshape(5,5))
df1
Out[61]: 
          0         1         2         3         4
0  0.196114  0.647705  0.891893  0.357636  0.718007
1  0.656609  0.362805  0.471687  0.173355  0.598185
2  0.888309  0.342225  0.549537  0.876028  0.763594
3  0.334012  0.466384  0.914467  0.859115  0.213126
4  0.541847  0.772825  0.376721  0.298648  0.904471

# 修改行列
df1 = DataFrame(np.random.rand(25).reshape(5,5), index=['A','B','D','E','F'], columns=['c1','c2','c3','c4','c5'])
df1
Out[64]: 
         c1        c2        c3        c4        c5
A  0.462358  0.792699  0.888952  0.435263  0.050586
B  0.770977  0.927863  0.434484  0.676555  0.765051
D  0.960935  0.426751  0.204403  0.137261  0.091423
E  0.956579  0.291759  0.632893  0.680106  0.921611
F  0.426127  0.731299  0.006030  0.674753  0.778577

# 行的reindex
df1.reindex(index=['A','B','C','D','E','F'])
Out[65]: 
         c1        c2        c3        c4        c5
A  0.462358  0.792699  0.888952  0.435263  0.050586
B  0.770977  0.927863  0.434484  0.676555  0.765051
C       NaN       NaN       NaN       NaN       NaN
D  0.960935  0.426751  0.204403  0.137261  0.091423
E  0.956579  0.291759  0.632893  0.680106  0.921611
F  0.426127  0.731299  0.006030  0.674753  0.778577

# 列的reindex
df1.reindex(columns=['c1','c2','c3','c4','c5','c6'])
Out[67]: 
         c1        c2        c3        c4        c5  c6
A  0.462358  0.792699  0.888952  0.435263  0.050586 NaN
B  0.770977  0.927863  0.434484  0.676555  0.765051 NaN
D  0.960935  0.426751  0.204403  0.137261  0.091423 NaN
E  0.956579  0.291759  0.632893  0.680106  0.921611 NaN
F  0.426127  0.731299  0.006030  0.674753  0.778577 NaN

# 同时对行列进行reindex
df1.reindex(index=['A','B','C','D','E','F'], columns=['c1','c2','c3','c4','c5','c6'])
Out[68]: 
         c1        c2        c3        c4        c5  c6
A  0.462358  0.792699  0.888952  0.435263  0.050586 NaN
B  0.770977  0.927863  0.434484  0.676555  0.765051 NaN
C       NaN       NaN       NaN       NaN       NaN NaN
D  0.960935  0.426751  0.204403  0.137261  0.091423 NaN
E  0.956579  0.291759  0.632893  0.680106  0.921611 NaN
F  0.426127  0.731299  0.006030  0.674753  0.778577 NaN

# 删除行列
# Series
s1
Out[69]: 
A    1
B    2
C    3
D    4
dtype: int64

s1.reindex(index = ['A','B'])
Out[70]: 
A    1
B    2
dtype: int64

# DataFrame
df1.reindex(index = ['A','B'])
Out[75]: 
         c1        c2        c3        c4        c5
A  0.462358  0.792699  0.888952  0.435263  0.050586
B  0.770977  0.927863  0.434484  0.676555  0.765051

# 使用drop进行删除
# Series直接指定要删除的行
s1.drop('A')
Out[78]: 
B    2
C    3
D    4
dtype: int64

# DataFrame使用drop是有个参数axis默认值为0,删除行,1则删除列

df1.drop(['A','B'], axis=0)
Out[81]: 
         c1        c2        c3        c4        c5
D  0.960935  0.426751  0.204403  0.137261  0.091423
E  0.956579  0.291759  0.632893  0.680106  0.921611
F  0.426127  0.731299  0.006030  0.674753  0.778577

df1.drop('c2', axis=1)
Out[82]: 
         c1        c3        c4        c5
A  0.462358  0.888952  0.435263  0.050586
B  0.770977  0.434484  0.676555  0.765051
D  0.960935  0.204403  0.137261  0.091423
E  0.956579  0.632893  0.680106  0.921611
F  0.426127  0.006030  0.674753  0.778577
发布了266 篇原创文章 · 获赞 420 · 访问量 35万+
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 技术黑板 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览