面对的任务,由于疫情,学术会议或其他会议大多采用腾讯会议来开讲。一场讲座可能持续1个小时。
以下程序完成了几个task
1.根据腾讯会议用户昵称和入会昵称提取出参会人员的对应学号和姓名
2.计算在会时间,若时长满足(默认40min),则确认为参会有效
3.将结果保存在exlce文件中
例如处理的原始数据如下
处理完成后的文件如下
"""
时间:2022/4/15
作者:林子轩
版权归属:广工计算机研学会学术部
"""
import pandas as pd
import re
import os
def get_name_StudentID(str, Flag, index):
"""
:param str: 用户昵称(入会昵称)
:param Flag: 是否满足条件
:return:【腾讯会议用户昵称,腾讯会议用户学号,入会昵称,入会学号,是否满足条件 ,原始数据下标】
"""
preData = []
rearData = []
preflag, rearflag = 1, 0
for i in str:
if i == '(' and i != ')':
preflag = 0
rearflag = 1
if preflag:
preData.append(i)
if rearflag:
rearData.append(i)
preStr = ''.join(preData)
rearStr = ''.join(rearData)
# 去掉头 ‘( ’ 和尾巴 ‘ )’
rearStr = rearStr[1:len(rearStr) - 1]
ConfStudentID = re.search("[0-9]{10}", rearStr)
ConfName = re.search("[\u4e00-\u9FEF]+", rearStr)
if ConfStudentID != None:
ConfStudentID = ConfStudentID.group()
if ConfName != None:
ConfName = ConfName.group()
TencenStudentID = re.search("[0-9]{10}", preStr)
TencenName = re.search("[\u4e00-\u9FEF]+", preStr)
if TencenStudentID != None:
TencenStudentID = TencenStudentID.group()
if TencenName != None:
TencenName = TencenName.group()
# print(TencenName, TencenStudentID, ConfName,ConfStudentID)
return [TencenName, TencenStudentID, ConfName, ConfStudentID, Flag, index]
def Result(Name_StudentId):
"""
以会议中名称和学号为主,若无会议名称或者学号,辅以腾讯会议
:param Name_StudentId: [TencenName, TencenStudentID, ConfName, ConfStudentID, Flag, index]
:return: (Success, Roster, Failure) DataFrame
"""
Success = []
Failure = []
for value in Name_StudentId.values:
'''
'''
if value[4]:
if value[2]:
name = value[2]
elif value[2] == None and value[0]:
name = value[0]
else:
name = None
if value[3]:
Id = value[3]
elif value[3] == None and value[1]:
Id = value[1]
else:
Id = None
Success.append([name, Id, value[5]])
else:
if value[2]:
name = value[2]
elif value[2] is None and value[0]:
name = value[0]
else:
name = None
if value[3]:
Id = value[3]
elif value[3] is None and value[1]:
Id = value[1]
else:
Id = None
Failure.append([name, Id, value[5]])
Success, Failure = pd.DataFrame(Success), pd.DataFrame(Failure)
Success, Roster = Filter(Success)
Success.columns, Failure.columns = ['成功名单', '学号', '原始下标'], ['时长不够名单', '学号', '原始下标']
return Success, Roster, Failure
def Filter(datafram):
"""
成功的名单中需要学号,如无学号需要归到待定位置
:param datafram: [name, ID, index]
:return:
"""
Success, Roster = [], []
for value in datafram.values:
if value[1] and value[0] and 1 < len(value[0]) < 5:
Success.append(value)
else:
Roster.append(value)
Success, Roster = pd.DataFrame(Success), pd.DataFrame(Roster)
Success.columns, Roster.columns = ['成功名单', '学号', '原始下标'], ['数据有误名单', '学号', '原始下标']
return Success, Roster
# ID= re.search("[0-9]{10}", string )
if __name__ == "__main__":
'''需满足的时间设定'''
Time_condition = '0:40:00'
'''------------------------------------------------------------------------------------------------------------'''
files = []
index = 0
for index, file in enumerate(os.listdir()): #
if file.endswith('.xlsx'):
files.append(file)
else:
pass
for file in files:
print(file[:2])
if file[:3] == '$导出' or file[:2] == '~$':
files.remove(file)
print(files)
for file in files:
print(f"加载文件 -- {file}")
data = pd.read_excel(file, engine='openpyxl')
'''找到文件的【用户昵称(入会昵称) 首次入会时间 最后退会时间 入会次数 累计参会时长 身份】'''
for i, row in data.iterrows():
if list(row) == ['用户昵称(入会昵称)', '首次入会时间', '最后退会时间', '入会次数', '累计参会时长', '身份']:
index = i
break
# 用iloc方法,使用行列的位置对数据框进行切片。支持布尔切片。
data.columns = data.iloc[index]
data = pd.DataFrame(data.iloc[index + 1:])
Rowlenth = len(data)
Name_StudentId = data["用户昵称(入会昵称)"]
Time = data["累计参会时长"]
Flag = Time > Time_condition
Name_StudentId = list(
map(lambda string, flag, index: get_name_StudentID(string, flag, index),
Name_StudentId, Flag,
range(len(Name_StudentId))))
Name_StudentId = pd.DataFrame(Name_StudentId)
Name_StudentId.columns = ['腾讯会议用户昵称', '腾讯会议用户学号', '入会昵称', '入会学号', '是否满足条件',
'原始下标']
Success, Roster, Failure = Result(Name_StudentId)
'''验证最终人数是否全部处理完毕'''
Resultlenth = len(Success) + len(Roster) + len(Failure)
Success.loc[0] = [f'人数:{Rowlenth}', '', f"处理:{Resultlenth}", ]
if Rowlenth != Resultlenth:
print("数据处理异常,请再次核验")
'''创建一个新的excel'''
write = pd.ExcelWriter(f"$导出{file[:-5]}.xlsx")
Success.to_excel(write, sheet_name='成功名单')
Roster.loc[len(Roster)] = [
'数据有误的人符合发放规则,但由于学号或名字有误,或其他因素,无法找到对应数据,请到原始数据下标查找原始下标对应的数据,联系对应人员',
'', '']
Roster.to_excel(write, sheet_name='待定名单')
Failure.to_excel(write, sheet_name='失败名单')
data['原始下标'] = range(len(data))
data.to_excel(write, sheet_name='原始数据下标查找表')
Name_StudentId.to_excel(write, sheet_name='处理数据')
write.save()
print(f"已经生成导出文件 $导出{file[:-5]}.xlsx")
print("->->->-> Demo 运行完毕!")