import pandas as pd
import mymod
import excelformat2
import re
from datetime import datetime
from fuzzywuzzy import fuzz
date= mymod. datestr( )
writer= pd. ExcelWriter( r'C:\Users\XinXinbuX280\Desktop\阿达木三期p\output\阿达木三期异常有临床意义LB匹配MHAE{}.xlsx' . format ( date) )
file = r'C:\Users\XinXinbuX280\Desktop\阿达木三期p\output\阿达木三期异常有临床意义LB{}.xlsx' . format ( date)
file_raw= r'C:\Users\XinXinbuX280\Desktop\阿达木三期p\rawdata\WIBP2018004_阿达木III期_数据库冻结后_Datasets_XLS_Site-All_zh-CN_20210918.xlsx'
file_match= r'C:\Users\XinXinbuX280\Desktop\阿达木三期p\rawdata\LB-AE匹配规则.xlsx'
matchfile= pd. read_excel( r'C:\Users\XinXinbuX280\Desktop\阿达木三期p\rawdata\LB-AE匹配规则.xlsx' )
key= list ( matchfile[ '检查项' ] )
value= list ( matchfile[ 'key' ] )
match= dict ( zip ( key, value) )
for i in match. keys( ) :
match[ i] = match[ i] . split( ',' )
ae= pd. read_excel( file_raw, 'AE' )
lb= pd. read_excel( file )
mh= pd. read_excel( file_raw, 'MH' )
ae[ 'AEENDAT' ] = ae[ 'AEENDAT' ] . fillna( '2100-01-01' )
mh[ 'MHENDAT' ] = mh[ 'MHENDAT' ] . fillna( '2100-01-01' )
for i in range ( 1 , len ( ae) ) :
ae. loc[ i, [ 'AESTDAT' ] ] = re. sub( 'UNK' , '01' , ae[ 'AESTDAT' ] [ i] )
ae. loc[ i, [ 'AEENDAT' ] ] = re. sub( 'UNK' , '01' , ae[ 'AEENDAT' ] [ i] )
print ( ae[ 'AESTDAT' ] [ i] )
for i in range ( 1 , len ( ae) ) :
ae. loc[ i, 'AESTDAT' ] = datetime. strptime( ae[ 'AESTDAT' ] [ i] , '%Y-%m-%d' )
ae. loc[ i, 'AEENDAT' ] = datetime. strptime( ae[ 'AEENDAT' ] [ i] , '%Y-%m-%d' )
for i in range ( 1 , len ( mh) ) :
if mh[ 'MHSTDAT' ] [ i] [ 0 ] == 'U' :
mh. loc[ i, [ 'MHSTDAT' ] ] = '1900-01-01'
if mh[ 'MHENDAT' ] [ i] [ 0 ] == 'U' :
mh. loc[ i, [ 'MHENDAT' ] ] = '2100-01-01'
if mh[ 'MHSTDAT' ] [ i] [ 0 ] != 'U' and mh[ 'MHENDAT' ] [ i] [ 0 ] != 'U' :
mh. loc[ i, [ 'MHSTDAT' ] ] = re. sub( 'UNK' , '01' , mh[ 'MHSTDAT' ] [ i] )
mh. loc[ i, [ 'MHENDAT' ] ] = re. sub( 'UNK' , '01' , mh[ 'MHENDAT' ] [ i] )
print ( mh[ 'MHSTDAT' ] [ i] )
for i in range ( 1 , len ( mh) ) :
mh. loc[ i, 'MHSTDAT' ] = datetime. strptime( mh[ 'MHSTDAT' ] [ i] , '%Y-%m-%d' )
mh. loc[ i, 'MHENDAT' ] = datetime. strptime( mh[ 'MHENDAT' ] [ i] , '%Y-%m-%d' )
aeterm= [ ]
aest= [ ]
aeed= [ ]
aeno= [ ]
mhterm= [ ]
mhst= [ ]
mhed= [ ]
mhno= [ ]
match_result= [ ]
for i in range ( len ( lb) ) :
lb. loc[ i, [ '检查日期' ] ] = datetime. strptime( lb[ '检查日期' ] [ i] , '%Y-%m-%d' )
aeterm. append( '#匹配失败' )
aest. append( ' ' )
aeed. append( ' ' )
aeno. append( ' ' )
mhterm. append( '#匹配失败' )
mhst. append( ' ' )
mhed. append( ' ' )
mhno. append( ' ' )
match_result. append( '#匹配失败' )
for i in range ( len ( lb) ) :
if lb[ '表单名称' ] [ i] != 'LB_OTH' :
for m in match[ lb[ '检查项目' ] [ i] ] :
print ( m)
for n in range ( 1 , len ( ae) ) :
if lb[ '筛选号' ] [ i] == ae[ 'SUBJID' ] [ n] and ( lb[ '检查日期' ] [ i] - ae[ 'AESTDAT' ] [ n] ) . days>= 0 and ( lb[ '检查日期' ] [ i] - ae[ 'AEENDAT' ] [ n] ) . days <= 0 :
if len ( re. findall( m, ae[ 'AETERM' ] [ n] ) ) > 0 :
aeterm[ i] = ae[ 'AETERM' ] [ n]
aest[ i] = ae[ 'AESTDAT' ] [ n]
aeed[ i] = ae[ 'AEENDAT' ] [ n]
aeno[ i] = ae[ 'AESEQ' ] [ n]
for i in range ( len ( lb) ) :
if lb[ '表单名称' ] [ i] != 'LB_OTH' :
for m in match[ lb[ '检查项目' ] [ i] ] :
print ( m)
for n in range ( 1 , len ( mh) ) :
if lb[ '筛选号' ] [ i] == mh[ 'SUBJID' ] [ n] and ( lb[ '检查日期' ] [ i] - mh[ 'MHSTDAT' ] [ n] ) . days>= 0 and ( lb[ '检查日期' ] [ i] - mh[ 'MHENDAT' ] [ n] ) . days <= 0 :
if len ( re. findall( m, mh[ 'MHTERM' ] [ n] ) ) > 0 :
mhterm[ i] = mh[ 'MHTERM' ] [ n]
mhst[ i] = mh[ 'MHSTDAT' ] [ n]
mhed[ i] = mh[ 'MHENDAT' ] [ n]
mhno[ i] = mh[ 'MHSEQ' ] [ n]
for i in range ( len ( lb) ) :
if lb[ '表单名称' ] [ i] == 'LB_OTH' :
ratiodic= { }
for n in range ( 1 , len ( ae) ) :
print ( lb[ '检查日期' ] [ i] , ae[ 'AESTDAT' ] [ n] , ae[ 'AEENDAT' ] [ n] )
if ( lb[ '检查日期' ] [ i] - ae[ 'AESTDAT' ] [ n] ) . days>= 0 and ( lb[ '检查日期' ] [ i] - ae[ 'AEENDAT' ] [ n] ) . days <= 0 and lb[ '筛选号' ] [ i] == ae[ 'SUBJID' ] [ n] :
ratio= fuzz. ratio( lb[ '备注' ] [ i] , ae[ 'AETERM' ] [ n] )
ratiodic[ ratio] = n
if len ( ratiodic) > 0 and max ( ratiodic. keys( ) ) >= 50 :
num= ratiodic[ max ( ratiodic. keys( ) ) ]
aeterm[ i] = ae[ 'AETERM' ] [ num]
aest[ i] = ae[ 'AESTDAT' ] [ num]
aeed[ i] = ae[ 'AEENDAT' ] [ num]
aeno[ i] = ae[ 'AESEQ' ] [ num]
for i in range ( len ( lb) ) :
if lb[ '表单名称' ] [ i] == 'LB_OTH' :
ratiodic= { }
for n in range ( 1 , len ( mh) ) :
if ( lb[ '检查日期' ] [ i] - mh[ 'MHSTDAT' ] [ n] ) . days>= 0 and ( lb[ '检查日期' ] [ i] - mh[ 'MHENDAT' ] [ n] ) . days <= 0 and lb[ '筛选号' ] [ i] == mh[ 'SUBJID' ] [ n] :
ratio= fuzz. ratio( lb[ '备注' ] [ i] , mh[ 'MHTERM' ] [ n] )
ratiodic[ ratio] = n
if len ( ratiodic) > 0 and max ( ratiodic. keys( ) ) >= 50 :
num= ratiodic[ max ( ratiodic. keys( ) ) ]
mhterm[ i] = mh[ 'MHTERM' ] [ num]
mhst[ i] = mh[ 'MHSTDAT' ] [ num]
mhed[ i] = mh[ 'MHENDAT' ] [ num]
mhno[ i] = mh[ 'MHSEQ' ] [ num]
for i in range ( len ( lb) ) :
if lb[ '表单名称' ] [ i] == 'LB_OTH' and aeterm[ i] == '#匹配失败' :
ratiodic= { }
for n in range ( 1 , len ( ae) ) :
print ( lb[ '检查日期' ] [ i] , ae[ 'AESTDAT' ] [ n] , ae[ 'AEENDAT' ] [ n] )
if ( lb[ '检查日期' ] [ i] - ae[ 'AESTDAT' ] [ n] ) . days>= 0 and ( lb[ '检查日期' ] [ i] - ae[ 'AEENDAT' ] [ n] ) . days <= 0 and lb[ '筛选号' ] [ i] == ae[ 'SUBJID' ] [ n] :
ratio= fuzz. ratio( lb[ '检查项目' ] [ i] , ae[ 'AETERM' ] [ n] )
ratiodic[ ratio] = n
if len ( ratiodic) > 0 and max ( ratiodic. keys( ) ) >= 50 :
num= ratiodic[ max ( ratiodic. keys( ) ) ]
aeterm[ i] = ae[ 'AETERM' ] [ num]
aest[ i] = ae[ 'AESTDAT' ] [ num]
aeed[ i] = ae[ 'AEENDAT' ] [ num]
aeno[ i] = ae[ 'AESEQ' ] [ num]
for i in range ( len ( lb) ) :
if lb[ '表单名称' ] [ i] == 'LB_OTH' and mhterm[ i] == '#匹配失败' :
ratiodic= { }
for n in range ( 1 , len ( mh) ) :
if ( lb[ '检查日期' ] [ i] - mh[ 'MHSTDAT' ] [ n] ) . days>= 0 and ( lb[ '检查日期' ] [ i] - mh[ 'MHENDAT' ] [ n] ) . days <= 0 and lb[ '筛选号' ] [ i] == mh[ 'SUBJID' ] [ n] :
ratio= fuzz. ratio( lb[ '检查项目' ] [ i] , mh[ 'MHTERM' ] [ n] )
ratiodic[ ratio] = n
if len ( ratiodic) > 0 and max ( ratiodic. keys( ) ) >= 50 :
num= ratiodic[ max ( ratiodic. keys( ) ) ]
mhterm[ i] = mh[ 'MHTERM' ] [ num]
mhst[ i] = mh[ 'MHSTDAT' ] [ num]
mhed[ i] = mh[ 'MHENDAT' ] [ num]
mhno[ i] = mh[ 'MHSEQ' ] [ num]
lb. insert( len ( lb. columns) , '不良事件名称' , aeterm)
lb. insert( len ( lb. columns) , '不良事件序号' , aeno)
lb. insert( len ( lb. columns) , '不良事件开始日期' , aest)
lb. insert( len ( lb. columns) , '不良事件结束日期' , aeed)
lb. insert( len ( lb. columns) , '既往病史名称' , mhterm)
lb. insert( len ( lb. columns) , '既往病史序号' , mhno)
lb. insert( len ( lb. columns) , '既往病史开始日期' , mhst)
lb. insert( len ( lb. columns) , '既往病史结束日期' , mhed)
for i in range ( len ( lb) ) :
if lb[ '不良事件名称' ] [ i] == '#匹配失败' and lb[ '既往病史名称' ] [ i] == '#匹配失败' :
match_result[ i] = '#匹配失败'
if lb[ '不良事件名称' ] [ i] != '#匹配失败' and lb[ '既往病史名称' ] [ i] == '#匹配失败' :
match_result[ i] = '与AE匹配'
if lb[ '不良事件名称' ] [ i] == '#匹配失败' and lb[ '既往病史名称' ] [ i] != '#匹配失败' :
match_result[ i] = '与MH匹配'
if lb[ '不良事件名称' ] [ i] != '#匹配失败' and lb[ '既往病史名称' ] [ i] != '#匹配失败' :
match_result[ i] = '与AE、MH匹配'
lb. insert( len ( lb. columns) , '匹配结果' , match_result)
for i in range ( len ( lb) ) :
lb. loc[ i, [ '检查日期' ] ] = datetime. strftime( lb[ '检查日期' ] [ i] , '%Y-%m-%d' )
if lb[ '不良事件名称' ] [ i] != '#匹配失败' :
lb. loc[ i, [ '不良事件开始日期' ] ] = datetime. strftime( lb[ '不良事件开始日期' ] [ i] , '%Y-%m-%d' )
lb. loc[ i, [ '不良事件结束日期' ] ] = datetime. strftime( lb[ '不良事件结束日期' ] [ i] , '%Y-%m-%d' )
if lb[ '既往病史名称' ] [ i] != '#匹配失败' :
lb. loc[ i, [ '既往病史开始日期' ] ] = datetime. strftime( lb[ '既往病史开始日期' ] [ i] , '%Y-%m-%d' )
lb. loc[ i, [ '既往病史结束日期' ] ] = datetime. strftime( lb[ '既往病史结束日期' ] [ i] , '%Y-%m-%d' )
lb. to_excel( writer, index= False , sheet_name= 'AEMH匹配到LB' )
writer. save( )
outfile= r'C:\Users\XinXinbuX280\Desktop\阿达木三期p\output\阿达木三期异常有临床意义LB匹配MHAE{}.xlsx' . format ( date)
excelformat2. reset_col( outfile)
excelformat2. reset_format( outfile)