importosfrom email importmessage_from_fileimportreimporthashlibimportjsonimportpymssqldefparseEmail(file):
with open(file) as f:
name=f.name
msg=message_from_file(f)
target_text=msg.get_payload()[0].get_payload()#Timestamp: 2020-07-30 00:00:00 (UTC)
#Koality_AutoDQ4_OnePipeline_OfficeForms
timestamp = re.findall("Timestamp: (\d{4}-\d{1,2}-\d{1,2})",target_text)[0]
dataset= re.findall("Dataset: .+_.+_.+_(.+_*\w+)
unique_dataset_name= re.findall("Dataset: (.+_.+_.+_.+)
metrics= re.findall("Metrics[:=]\n*(.+\n*.+)\n*<=*\n*h", target_text)[0]
metrics= metrics.replace('\n', '').replace('=','').replace(':', '')#Koality_AutoDQ4_OA-OXO_Teams_DoD, whose dataset is Teams_DoD instaed of DoD
if 'Teams_' inunique_dataset_name:
dataset= 'Teams_' +dataset
incident_source_text= target_text[target_text.find('Incident List'): target_text.find('RootCause')].replace('=', '').replace('\n', '')
incidents_str= incident_source_text[:incident_source_text.find('
incdient_groups= re.findall("(\w+:\w+),", incidents_str)
incident_dict={}for incident inincdient_groups:
k, v= incident.split(':')
incident_dict[k]=v
incident_json=json.dumps(incident_dict)
hashkey=toHash(incident_json)returntimestamp, dataset, metrics, incident_json, hashkey, unique_dataset_namedeftoHash(text):
hs=hashlib.md5()
hs.update(text.encode())returnhs.hexdigest().upper()definsertIntoDB(conn,table, results):
cur=conn.cursor()
count=0for row inresults:
rowDict= {'table': table,'timestamp':row[0],'dataset':row[1],'metric':row[2],'json':row[3],'hashkey':row[4],'unique_dataset_name': row[5]}
sql= """insert into [{table}]
([WorkloadName], [MetricName], [DimensionCombination_JsonForDb_Hash], [Timestamp], [DimensionCombination_JsonForDb], [UniqueDatasetName], [MetricValue], [ExpectedMetricValue])
VALUES ('{dataset}', '{metric}','{hashkey}','{timestamp}', '{json}', '{unique_dataset_name}', 0, 0);""".format(**rowDict)try:
cur.execute(sql)print('Inserted a row......')except:print(row[1], row[5])
count+= 1
print("Successfully inserted {:d} rows....".format(count))
cur.close()
conn.close()returnTruedefmain():print("Starting script......")
emailFolder= r"C:\Work\IncidentEmails"results=[]for filePath inos.listdir(emailFolder):
fullPath= emailFolder + "\\" +filePath
timestamp, dataset, metrics, incident_json, hashkey, unique_dataset_name=parseEmail(fullPath)
results.append([timestamp, dataset, metrics, incident_json, hashkey, unique_dataset_name])
conn= pymssql.connect('server', 'usr@server', 'pwd', 'db', autocommit=True)
insertIntoDB(conn,'KenshoAutoDQAlert', results)if __name__ == "__main__":
main()