读取xes文件
直接读取
from pm4py.objects.log.importer.xes import importer as xes_importer
log = xes_importer.apply('<path_to_xes_file.xes>') # 引号中的为文件地址
将xes中的数据按时间戳排序后读取
from pm4py.objects.log.importer.xes import importer as xes_importer
variant = xes_importer.Variants.ITERPARSE
parameters = {variant.value.Parameters.TIMESTAMP_SORT: True}
log = xes_importer.apply('<path_to_xes_file>', # 引号中的为文件地址
variant=variant, parameters=parameters)
访问xes文件
xes文件的数据形式为字典形式的列表,整个文件可以理解为一个二维数组([轨迹长度][单个轨迹中的事件数量])
(下方代码使用的数据集为BPIC15_1.xes)
event_log = log
print(event_log[0]) #prints the first trace of the log
'''
{'attributes': {'Includes_subCases': 'N', 'concept:name': '2760925', 'Responsible_actor': '4901428', 'endDate': datetime.datetime(2014, 6, 10, 14, 13, 27, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'caseStatus': 'G', 'parts': 'Inrit/Uitweg,Inrit/Uitweg', 'last_phase': 'Buiten behandeling gelaten', 'case_type': '557669', 'startDate': datetime.datetime(2010, 10, 5, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'requestComplete': 'TRUE', 'IDofConceptCase': '2760933'}, 'events': [{'question': '5-10-2010 0:00:00', 'dateFinished': '2010-10-07 14:57:22', 'dueDate': datetime.datetime(2010, 10, 10, 14, 43, 8, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'action_code': '01_HOOFD_010', 'activityNameEN': 'register submission date request', 'planned': datetime.datetime(2010, 10, 8, 14, 43, 8, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'time:timestamp': datetime.datetime(2010, 10, 5, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'monitoringResource': '560894', 'org:resource': '560872', 'activityNameNL': 'registratie datum binnenkomst aanvraag', 'concept:name': '01_HOOFD_010', 'lifecycle:transition': 'complete'}, '..', {'monitoringResource': '560894', 'org:resource': '560872', 'activityNameNL': 'fase aanvraag ontvangen', 'concept:name': '01_HOOFD_015', 'question': 'EMPTY', 'dateFinished': '2010-10-07 14:57:22', 'action_code': '01_HOOFD_015', 'activityNameEN': 'phase application received', 'lifecycle:transition': 'complete', 'time:timestamp': datetime.datetime(2010, 10, 7, 14, 57, 14, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200)))}]}
'''
print(event_log[0][0]) #prints the first event of the first trace
'''
{'question': '5-10-2010 0:00:00', 'dateFinished': '2010-10-07 14:57:22', 'dueDate': datetime.datetime(2010, 10, 10, 14, 43, 8, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'action_code': '01_HOOFD_010', 'activityNameEN': 'register submission date request', 'planned': datetime.datetime(2010, 10, 8, 14, 43, 8, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'time:timestamp': datetime.datetime(2010, 10, 5, 0, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=7200))), 'monitoringResource': '560894', 'org:resource': '560872', 'activityNameNL': 'registratie datum binnenkomst aanvraag', 'concept:name': '01_HOOFD_010', 'lifecycle:transition': 'complete'}
'''
print(dict(event_log[0].attributes).keys()) # attributes中存放了对应部分中所存储的数据名称(整个文件以及单个轨迹中有attributes,单个事件中没有attributes)
'''
dict_keys(['Includes_subCases', 'concept:name', 'Responsible_actor', 'endDate', 'caseStatus', 'parts', 'last_phase', 'case_type', 'startDate', 'requestComplete', 'IDofConceptCase'])
'''
print(dict(event_log[0][0]).keys()) # 单个事件可以直接转化为字典查看数据名称
'''
dict_keys(['question', 'dateFinished', 'dueDate', 'action_code', 'activityNameEN', 'planned', 'time:timestamp', 'monitoringResource', 'org:resource', 'activityNameNL', 'concept:name', 'lifecycle:transition'])
'''
print(event_log[0][0]["concept:name"]) # 可以直接访问数据名称获取对应数据
'''
01_HOOFD_010
'''
实例
本实例的目标是统计数据集中的一系列信息:
targets = [
'数据集',
'Case数量',
'Activity数量',
'Event数量',
'日志开始时间',
'日志结束时间',
'Case内最大Event数量',
'Case内最小Event数量'