1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | import pandas as pd import datetime #用来计算日期差的包 def dataInterval(data1,data2): d1 = datetime.datetime.strptime(data1, '%Y-%m-%d' ) d2 = datetime.datetime.strptime(data2, '%Y-%m-%d' ) delta = d1 - d2 return delta.days def getInterval(arrLike): #用来计算日期间隔天数的调用的函数 PublishedTime = arrLike[ 'PublishedTime' ] ReceivedTime = arrLike[ 'ReceivedTime' ] # print(PublishedTime.strip(),ReceivedTime.strip()) days = dataInterval(PublishedTime.strip(),ReceivedTime.strip()) #注意去掉两端空白 return days if __name__ = = '__main__' : fileName = "NS_new.xls" ; df = pd.read_excel(fileName) df[ 'TimeInterval' ] = df. apply (getInterval , axis = 1 ) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | import pandas as pd import datetime #用来计算日期差的包 def dataInterval(data1,data2): d1 = datetime.datetime.strptime(data1, '%Y-%m-%d' ) d2 = datetime.datetime.strptime(data2, '%Y-%m-%d' ) delta = d1 - d2 return delta.days def getInterval_new(arrLike,before,after): #用来计算日期间隔天数的调用的函数 before = arrLike[before] after = arrLike[after] # print(PublishedTime.strip(),ReceivedTime.strip()) days = dataInterval(after.strip(),before.strip()) #注意去掉两端空白 return days if __name__ = = '__main__' : fileName = "NS_new.xls" ; df = pd.read_excel(fileName) df[ 'TimeInterval' ] = df. apply (getInterval_new , axis = 1 , args = ( 'ReceivedTime' , 'PublishedTime' )) #调用方式一 #下面的调用方式等价于上面的调用方式 df[ 'TimeInterval' ] = df. apply (getInterval_new , axis = 1 , * * { 'before' : 'ReceivedTime' , 'after' : 'PublishedTime' }) #调用方式二 #下面的调用方式等价于上面的调用方式 df[ 'TimeInterval' ] = df. apply (getInterval_new , axis = 1 , before = 'ReceivedTime' ,after = 'PublishedTime' ) #调用方式三 |
修改后的getInterval_new函数多了两个参数,这样我们在使用apply函数的时候要自己
传递参数,代码中显示的三种传递方式都行。
最后,本篇的全部代码在下面这个网页可以下载:
https://github.com/Dongzhixiao/Python_Exercise/tree/master/pandas_apply