Python实现时间段去重合并
老师排课数据如下,想要计算出这个老师当天的在职时长
也就是对上图中的时间段数据进行去重合并,结果如下:
#! usr/bin/env python3
# -*- coding:utf-8 -*-
import pandas as pd
import numpy as np
pd.set_option('display.unicode.east_asian_width',True)
pd.set_option('display.max_columns',None)
data = pd.read_excel(r'./教师课程数据导出5月31.xlsx')
data = pd.DataFrame(data)
data1= data.sort_values(by=['schoolId','teacherId','startTime'])
schoolId = data1['schoolId'].tolist()
teacherId = data1['teacherId'].tolist()
SetNum = list(set(zip(schoolId,teacherId)))
ListValue = []
for i,j in SetNum:
# print(i,j)
data0 = data1[(data1['schoolId'] == i)& (data1['teacherId'] == j)]
startTimeList = data0['startTime'].tolist()
min_startTime = startTimeList[0]
max_endTime = data0['endTime'].tolist()[0]
for k in range(1,len(startTimeList)):
# print(k)
# print(data0['startTime'].tolist()[k])
if data0['startTime'].tolist()[k] > max_endTime:
ListValue.append([i,j,min_startTime,max_endTime])
min_startTime = startTimeList[k]
max_endTime = data0['endTime'].tolist()[k]
continue
elif data0['startTime'].tolist()[k] <= max_endTime and data0['endTime'].tolist()[k] < max_endTime:
continue
elif data0['startTime'].tolist()[k] <= max_endTime and data0['endTime'].tolist()[k] >= max_endTime:
max_endTime = data0['endTime'].tolist()[k]
continue
ListValue.append([i,j,min_startTime,max_endTime])
df = pd.DataFrame(ListValue,columns=['schoolId','teacherId','startTime','endTime'])
print(df)