考虑以下迷你版本的问题:
from io import StringIO
from pandas import read_csv, to_datetime
# how close do sessions have to be to be considered equal? (in minutes)
threshold = 5
# datetime column (combination of date + start_time)
dtc = [['date', 'start_time']]
# index column (above combination)
ixc = 'date_start_time'
df1 = read_csv(StringIO(u'''
date,start_time,employee_id,session_id
01/01/2016,02:03:00,7261824,871631182
01/01/2016,06:03:00,7261824,871631183
01/01/2016,11:01:00,7261824,871631184
01/01/2016,14:01:00,7261824,871631185
'''), parse_dates=dtc)
df2 = read_csv(StringIO(u'''
date,start_time,employee_id,session_id
01/01/2016,02:03:00,7261824,871631182
01/01/2016,06:05:00,7261824,871631183