五、排序
1、按值排序
import numpy as np
import pandas as pd
# 排序1 - 按值排序 .sort_values
# 同样适用于Series
df1 = pd.DataFrame(np.random.rand(16).reshape(4, 4) * 100,
columns=['a', 'b', 'c', 'd'])
print("df1 = \n", df1)
print("-" * 100)
# 单列排序【ascending参数:设置升序降序,默认升序】
# 升序
df3 = df1.sort_values(['a'], ascending=True)
print("df1升序后返回新对象:df3 = df1.sort_values(['a'], ascending=True) = \n", df3)
print('-' * 50)
# 降序
df4 = df1.sort_values(['a'], ascending=False)
print("df1降序后返回新对象:df4 = df1.sort_values(['a'], ascending=False) = \n", df4)
print('-' * 100)
# 多列排序,按列顺序排序
df2 = pd.DataFrame({'a': [1, 1, 1, 1, 2, 2, 2, 2],
'b': list(range(8)),
'c': list(range(8, 0, -1))})
print("df2 = \n", df2)
print('-' * 50)
df5 = df2.sort_values(['a', 'c'])
print("df2按照第a、c列升序后返回新对象:df5 = df2.sort_values(['a', 'c']) = \n", df5)
打印结果:
df1 =
a b c d
0 96.648434 36.714275 63.817178 28.597929
1 68.880331 49.645003 75.562495 68.234420
2 50.189398 81.142625 66.546337 34.114630
3 6.967286 13.062011 33.094863 50.918002
----------------------------------------------------------------------------------------------------
df1升序后返回新对象:df3 = df1.sort_values(['a'], ascending=True) =
a b c d
3 6.967286 13.062011 33.094863 50.918002
2 50.189398 81.142625 66.546337 34.114630
1 68.880331 49.645003 75.562495 68.234420
0 96.648434 36.714275 63.817178 28.597929
--------------------------------------------------
df1降序后返回新对象:df4 = df1.sort_values(['a'], ascending=False) =
a b c d
0 96.648434 36.714275 63.817178 28.597929
1 68.880331 49.645003 75.562495 68.234420
2 50.189398 81.142625 66.546337 34.114630
3 6.967286 13.062011 33.094863 50.918002
----------------------------------------------------------------------------------------------------
df2 =
a b c
0 1 0 8
1 1 1 7
2 1 2 6
3 1 3 5
4 2 4 4
5 2 5 3
6 2 6 2
7 2 7 1
--------------------------------------------------
df2按照第a、c列升序后返回新对象:df5 = df2.sort_values(['a', 'c']) =
a b c
3 1 3 5
2 1 2 6
1 1 1 7
0 1 0 8
7 2 7 1
6 2 6 2
5 2 5 3
4 2 4 4
Process finished with exit code 0
2、按索引排序
import numpy as np
import pandas as pd
# 排序2 - 索引排序 .sort_index
df1 = pd.DataFrame(np.random.rand(16).reshape(4, 4) * 100,
index=[5, 4, 3, 2],
columns=['a', 'b', 'c', 'd'])
df2 = pd.DataFrame(np.random.rand(16).reshape(4, 4) * 100,
index=['h', 's', 'x', 'g'],
columns=['a', 'b', 'c', 'd'])
# 按照index排序
# 默认 ascending=True, inplace=False
print("df1 = \n", df1)
print('-' * 50)
df3 = df1.sort_index()
print("df1按索引排序后返回新对象:df3 = df1.sort_index() = \n", df3)
print('-' * 100)
print("df2 = \n", df2)
print('-' * 50)
df4 = df2.sort_index()
print("df2按索引排序后返回新对象:df4 = df2.sort_index() = \n", df4)
打印结果:
df1 =
a b c d
5 20.335837 69.538529 48.694662 59.057213
4 48.784583 67.559001 52.914242 5.298351
3 99.725511 73.362798 51.517547 64.382411
2 95.567502 41.242870 81.558410 36.106115
--------------------------------------------------
df1按索引排序后返回新对象:df3 = df1.sort_index() =
a b c d
2 95.567502 41.242870 81.558410 36.106115
3 99.725511 73.362798 51.517547 64.382411
4 48.784583 67.559001 52.914242 5.298351
5 20.335837 69.538529 48.694662 59.057213
----------------------------------------------------------------------------------------------------
df2 =
a b c d
h 63.789178 29.338815 2.731508 48.179283
s 88.688644 22.287952 15.571920 48.406608
x 22.671023 29.225815 29.029033 14.588667
g 60.886301 96.096387 64.636511 37.613622
--------------------------------------------------
df2按索引排序后返回新对象:df4 = df2.sort_index() =
a b c d
g 60.886301 96.096387 64.636511 37.613622
h 63.789178 29.338815 2.731508 48.179283
s 88.688644 22.287952 15.571920 48.406608
x 22.671023 29.225815 29.029033 14.588667
Process finished with exit code 0