函数应用和映射
from pandas import Series,DataFrame
import pandas as pd
import numpy as np
frame = DataFrame(np.random.randn(4,3),
columns=list('bde'),index=['Utah','Ohio','Texas','Oregon'])
frame
| b | d | e |
---|
Utah | -1.461214 | -0.989854 | -1.491231 |
---|
Ohio | -0.186691 | 1.555308 | 0.129673 |
---|
Texas | -0.934857 | -0.769416 | -1.226763 |
---|
Oregon | -1.265840 | -0.246789 | -1.339882 |
---|
np.abs() 取绝对值
np.abs(frame)
| b | d | e |
---|
Utah | 1.461214 | 0.989854 | 1.491231 |
---|
Ohio | 0.186691 | 1.555308 | 0.129673 |
---|
Texas | 0.934857 | 0.769416 | 1.226763 |
---|
Oregon | 1.265840 | 0.246789 | 1.339882 |
---|
.apply() 方法 将函数应用到由各列或行所形成的一维数组上
f = lambda x: x.max() - x.min()
f
frame.apply(f)
out:
b 1.274523
d 2.545163
e 1.620903
dtype: float64
frame.apply(f,axis=1)
out:
Utah 0.501376
Ohio 1.741999
Texas 0.457347
Oregon 1.093092
dtype: float64
def f(x):
return Series([x.min(),x.max()],index=['min','max'])
frame.apply(f)
| b | d | e |
---|
min | -1.461214 | -0.989854 | -1.491231 |
---|
max | -0.186691 | 1.555308 | 0.129673 |
---|
applymap() 得到DataFrame中各个浮点数的格式化字符串
format = lambda x: '%.2f' % x
frame.applymap(format)
| b | d | e |
---|
Utah | -1.46 | -0.99 | -1.49 |
---|
Ohio | -0.19 | 1.56 | 0.13 |
---|
Texas | -0.93 | -0.77 | -1.23 |
---|
Oregon | -1.27 | -0.25 | -1.34 |
---|
map() 得到Series各个浮点数的格式化字符串
frame['e'].map(format)
out:
Utah -1.49
Ohio 0.13
Texas -1.23
Oregon -1.34
Name: e, dtype: object
排序和排名
.sort_index() 返回一个已排序的新对象(按列或行索引值进行排序) 默认升序
obj = Series(range(4),index=['b','a','b','c'])
obj
out:
b 0
a 1
b 2
c 3
dtype: int64
obj.sort_index()
out:
a 1
b 0
b 2
c 3
dtype: int64
frame = DataFrame(np.arange(8).reshape((2,4)),
index=['three','one'],columns=['d','a','b','c'])
frame
frame.sort_index()
frame.sort_index(axis=1)
降序
frame.sort_index(axis=1,ascending=False)
按单元格值 进行排序
obj = Series([4,7,-3,2])
obj
out:
0 4
1 7
2 -3
3 2
dtype: int64
.sort_values()
obj.sort_values()
out:
2 -3
3 2
0 4
1 7
dtype: int64
frame = DataFrame({'b': [4,7,-3,2],'a':[0,1,0,1]})
frame
frame.sort_index(by='b')
frame.sort_index(by=['a','b'])
rank() 排名
obj = Series([7,-5,7,4,2,0,4])
obj
out:
0 7
1 -5
2 7
3 4
4 2
5 0
6 4
dtype: int64
obj.rank()
out:
0 6.5
1 1.0
2 6.5
3 4.5
4 3.0
5 2.0
6 4.5
dtype: float64
frame = DataFrame({'b':[-4.3,7,-3,2],'a':[0,1,0,1],'c':[-2,5,8,-2.5]})
frame
| a | b | c |
---|
0 | 0 | -4.3 | -2.0 |
---|
1 | 1 | 7.0 | 5.0 |
---|
2 | 0 | -3.0 | 8.0 |
---|
3 | 1 | 2.0 | -2.5 |
---|
frame.rank(axis=1)
| a | b | c |
---|
0 | 3.0 | 1.0 | 2.0 |
---|
1 | 1.0 | 3.0 | 2.0 |
---|
2 | 2.0 | 1.0 | 3.0 |
---|
3 | 2.0 | 3.0 | 1.0 |
---|
降序
obj.rank(ascending=False)
out:
0 1.5
1 7.0
2 1.5
3 3.5
4 5.0
5 6.0
6 3.5
dtype: float64
obj.rank(ascending=False,method='max')
out:
0 2.0
1 7.0
2 2.0
3 4.0
4 5.0
5 6.0
6 4.0
dtype: float64
obj.rank(method='first')
out:
0 6.0
1 1.0
2 7.0
3 4.0
4 3.0
5 2.0
6 5.0
dtype: float64
method选项:
method | 说明 |
---|
average | 默认:在相等分组中,为各个值分配平均排名 |
min | 使用整个分组的最小排名 |
max | 使用整个分组的最大排名 |
frist | 按值在原始数据中的出现顺序分配排序 |