Serise中的apply()和DataFrame中的applymap()用法相同,调用的函数对单个数据做相应的运算,返回类型是Serise和DataFrame。而DataFrame中的apply()中调用的函数对一列进行运算,返回类型可为DataFrame或Series。
if True:
s = pd.Series([1, 2, 3, 4, 5])
def add_one(x):
return x + 1
print s.apply(add_one)
>>>
0 2
1 3
2 4
3 5
4 6
dtype: int64
name = 'James Joyce'
name_split = name.split()
name2 = name_split[1] + ' ' + name_split[0]
name2
>>>'Joyce James'
DataFrame中applymap()和apply()
# DataFrame applymap()
if 1:
df = pd.DataFrame({
'a': [1, 2, 3],
'b': [10, 20, 30],
'c': [5, 10, 15]
})
def add_one(x):
return x + 1
print df.applymap(add_one)
>>> a b c
0 2 11 6
1 3 21 11
2 4 31 16
# DataFrame apply()
if 1:
def convert_grades_curve(exam_grades):
# This will give the bottom 0% to 10% of students the grade 'F',
# 10% to 20% the grade 'D', and so on. You can read more about
# the qcut() function
return pd.qcut(exam_grades,
[0, 0.1, 0.2, 0.5, 0.8, 1],
labels=['F', 'D', 'C', 'B', 'A'])
print convert_grades_curve(grades_df['exam1'])
# to call the function on each column separately
print grades_df.apply(convert_grades_curve)
>>> exam1 exam2
Andre F F
Barry B B
Chris C C
Dan C C
Emilio B B
Fred C C
Greta A A
Humbert D D
Ivan A A
James B B
df = pd.DataFrame({
'a': [4, 5, 3, 1, 2],
'b': [20, 10, 40, 50, 30],
'c': [25, 20, 5, 15, 10]
})
if 1:
print df.apply(np.mean)
print df.apply(np.max)
>>>a 3.0
b 30.0
c 15.0
dtype: float64
a 5
b 50
c 25
dtype: int64