python apply函数

最新推荐文章于 2023-10-08 20:06:38 发布

静默安然

最新推荐文章于 2023-10-08 20:06:38 发布

阅读量334

点赞数

分类专栏： python

原文链接：https://blog.csdn.net/yanjiangdi/article/details/94764562

版权

python 专栏收录该内容

37 篇文章 4 订阅

订阅专栏

1、介绍

apply函数是pandas里面所有函数中自由度最高的函数。该函数如下：

DataFrame.apply(func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds)

该函数最有用的是第一个参数，这个参数是函数，相当于C/C++的函数指针。

这个函数需要自己实现，函数的传入参数根据axis来定，比如axis = 1，就会把一行数据作为Series的数据结构传入给自己实现的函数中，我们在函数中实现对Series不同属性之间的计算，返回一个结果，则apply函数会自动遍历每一行DataFrame的数据，最后将所有结果组合成一个Series数据结构并返回。

2、样例


 
 
   
   
    
    
   
   
   
   
    
    
     
     import numpy 
     
     as np
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     import pandas 
     
     as pd
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     f = 
     
     lambda x: x.max()-x.min()
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     df = pd.DataFrame(np.random.randn(
     
     4,
     
     3),columns=list(
     
     'bde'),index=[
     
     'utah', 
     
     'ohio', 
     
     'texas', 
     
     'oregon'])
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(df)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     t1 = df.apply(f)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(t1)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     t2 = df.apply(f, axis=
     
     1)
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(t2)

输出结果如下所示：


 
 
   
   
    
    
   
   
   
   
    
                   
     
     b         d         e
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     utah    
     
     1.
     
     106486  
     
     0.
     
     101113 -
     
     0.
     
     494279
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     ohio    
     
     0.
     
     955676 -
     
     1.
     
     889499  
     
     0.
     
     522151
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     texas   
     
     1.
     
     891144 -
     
     0.
     
     670588  
     
     0.
     
     106530
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     oregon -
     
     0.
     
     062372  
     
     0.
     
     991231  
     
     0.
     
     294464
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     b    
     
     1.
     
     953516
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     d    
     
     2.
     
     880730
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     e    
     
     1.
     
     016430
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     dtype: float
     
     64
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     utah      
     
     1.
     
     600766
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     ohio      
     
     2.
     
     845175
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     texas     
     
     2.
     
     561732
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     oregon    
     
     1.
     
     053603
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     dtype: float
     
     64

3、性能比较


 
 
   
   
    
    
   
   
   
   
    
    
     
     df = pd.DataFrame({
     
     'a': np.random.randn(
     
     6),
    
    
   
   

   
   
    
    
   
   
   
   
    
                       
     
     'b': [
     
     'foo', 
     
     'bar'] * 
     
     3,
    
    
   
   

   
   
    
    
   
   
   
   
    
                       
     
     'c': np.random.randn(
     
     6)})
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     def my_test(a, b):
    
    
   
   

   
   
    
    
   
   
   
   
    
        
     
     return a + b
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(df)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     df[
     
     'Value'] = df.apply(
     
     lambda row: my_test(row[
     
     'a'], row[
     
     'c']), axis=
     
     1) 
     
     # 方法1
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(df)
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     df[
     
     'Value2'] = df[
     
     'a'] + df[
     
     'c']  
     
     # 方法2
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     print(df)

输出结果如下：


 
 
   
   
    
    
   
   
   
   
    
              
     
     a    b         c
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     0 -
     
     1.
     
     194841  foo  
     
     1.
     
     648214
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     1 -
     
     0.
     
     377554  bar  
     
     0.
     
     496678
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     2  
     
     1.
     
     524940  foo -
     
     1.
     
     245333
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     3 -
     
     0.
     
     248150  bar  
     
     1.
     
     526515
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     4  
     
     0.
     
     283395  foo  
     
     1.
     
     282233
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     5  
     
     0.
     
     117674  bar -
     
     0.
     
     094462
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
              
     
     a    b         c     Value
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     0 -
     
     1.
     
     194841  foo  
     
     1.
     
     648214  
     
     0.
     
     453374
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     1 -
     
     0.
     
     377554  bar  
     
     0.
     
     496678  
     
     0.
     
     119124
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     2  
     
     1.
     
     524940  foo -
     
     1.
     
     245333  
     
     0.
     
     279607
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     3 -
     
     0.
     
     248150  bar  
     
     1.
     
     526515  
     
     1.
     
     278365
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     4  
     
     0.
     
     283395  foo  
     
     1.
     
     282233  
     
     1.
     
     565628
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     5  
     
     0.
     
     117674  bar -
     
     0.
     
     094462  
     
     0.
     
     023212
    
    
   
   

   
   
    
    
   
   
   
   
    
     
    
    
   
   

   
   
    
    
   
   
   
   
    
              
     
     a    b         c     Value    Value
     
     2
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     0 -
     
     1.
     
     194841  foo  
     
     1.
     
     648214  
     
     0.
     
     453374  
     
     0.
     
     453374
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     1 -
     
     0.
     
     377554  bar  
     
     0.
     
     496678  
     
     0.
     
     119124  
     
     0.
     
     119124
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     2  
     
     1.
     
     524940  foo -
     
     1.
     
     245333  
     
     0.
     
     279607  
     
     0.
     
     279607
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     3 -
     
     0.
     
     248150  bar  
     
     1.
     
     526515  
     
     1.
     
     278365  
     
     1.
     
     278365
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     4  
     
     0.
     
     283395  foo  
     
     1.
     
     282233  
     
     1.
     
     565628  
     
     1.
     
     565628
    
    
   
   

   
   
    
    
   
   
   
   
    
    
     
     5  
     
     0.
     
     117674  bar -
     
     0.
     
     094462  
     
     0.
     
     023212  
     
     0.
     
     023212

注意：当数据量很大时，对于简单的逻辑处理建议方法2（个人处理几百M数据集时，方法1花时200s左右，方法2花时10s）！！！

静默安然

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python apply函数

  1、介绍 apply函数是pandas里面所有函数中自由度最高的函数。该函数如下： DataFrame.apply(func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds) 该函数最有用的是第一个参数，这个参数是函数，相当于C/C++的函数指针。这个函数需要自己实现，函数的传入参数根据axis来定，比如axis = 1，就会把一行数据作为Series的数据结构传入给自己实现的函数中，我们在函数中
复制链接

扫一扫