python 批处理excel文件实现数据的提取

最新推荐文章于 2024-08-22 09:00:00 发布

KerryMo

最新推荐文章于 2024-08-22 09:00:00 发布

阅读量2.9k

点赞数

分类专栏： Python

19 篇文章 0 订阅

订阅专栏

 
 import  
 re 

 
 import  
 xlrd 

 
 f1  
 =  
 open 
 ( 
 "v9_c8_a3_a16.txt" 
 , 
 "w" 
 ) 

 
 f2  
 =  
 open 
 ( 
 "a9_not_c8a3a16.txt" 
 , 
 "w" 
 ) 

 
 f3  
 =  
 open 
 ( 
 "c8_not_v9a3a16.txt" 
 , 
 "w" 
 ) 

 
 f4  
 =  
 open 
 ( 
 "a3_not_v9c8a16.txt" 
 , 
 "w" 
 ) 

 
 f5  
 =  
 open 
 ( 
 "a16_not_v9c8a3.txt" 
 , 
 "w" 
 ) 

 
 def  
 read( 
 file 
 , sheet_index 
 = 
 0 
 ): 

 
      
 workbook  
 =  
 xlrd.open_workbook( 
 file 
 ) 

 
      
 sheet  
 =  
 workbook.sheet_by_index(sheet_index) 

 
      
 print 
 ( 
 "工作表名称:" 
 , sheet.name,  
 "行数:" 
 , sheet.nrows,  
 "列数:" 
 , sheet.ncols) 

 
      
 data  
 =  
 [] 

 
      
 for  
 i  
 in  
 range 
 ( 
 0 
 , sheet.nrows): 

 
          
 data.append(sheet.row_values(i)) 

 
 return  
 data

 
 def  
 red(text): 

 
      
 with  
 open 
 (text,  
 'r' 
 ) as f: 

 
          
 file  
 =  
 f.read() 

 
          
 regexp  
 =  
 r 
 'MGG_\d{5}' 

 
          
 pat  
 =  
 re. 
 compile 
 (regexp) 

 
          
 MGG_all  
 =  
 re.findall(pat,  
 file 
 ) 

 
          
 Mgg_unique  
 =  
 set 
 (MGG_all) 

 
 return  
 Mgg_unique

 
 v9  
 =  
 read(r 
 'zhu.xlsx' 
 ) 

 
 c8  
 =  
 read(r 
 'liu.xlsx' 
 ) 

 
 a3  
 =  
 red(r 
 'ATG3.csv' 
 ) 

 
 a16  
 =  
 red(r 
 'ATG16.csv' 
 ) 

 
 def  
 reg(data): 

 
          
 regexp  
 =  
 r 
 'MGG_\d{5}' 

 
          
 pat  
 =  
 re. 
 compile 
 (regexp) 

 
          
 MGG_all  
 =  
 re.findall(pat,  
 str 
 (data)) 
 #需为string格式 

 
          
 Mgg_unique  
 =  
 set 
 (MGG_all) 

 
 return  
 Mgg_unique

 
 def  
 vps9(): 

 
 return  
 reg(v9)

 
 def  
 cdk8(): 

 
 return  
 reg(c8)

 
 def  
 Atg3(): 

 
 return  
 reg(a3)

 
 def  
 Atg16(): 

 
 return  
 reg(a16)

 
 def  
 Mgg1_Mgg2(): 

 
      
 v9  
 =  
 vps9() 

 
      
 c8  
 =  
 cdk8() 

 
      
 a3  
 =  
 Atg3() 

 
      
 a16  
 =  
 Atg16() 

 
      
 v9_c8_a3_a16  
 =  
 v9&c8&a3&a16 

 
      
 v9_not_c8a3a16  
 =  
 v9 
 - 
 (c8|a3|a16) 

 
      
 c8_not_v9a3a16  
 =  
 c8 
 - 
 (v9|a3|a16) 

 
      
 a3_not_v9c8a16  
 =  
 a3 
 - 
 (v9|c8|a16) 

 
      
 a16_not_v9c8a3  
 =  
 a16 
 - 
 (v9|a3|c8) 

 
 return  
 v9_c8_a3_a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16,a16_not_v9c8a3

 
 def  
 message(): 

 
      
 v9_c8_a3a16, v9_not_c8a3a16, c8_not_v9a3a16, a3_not_v9c8a16, a16_not_v9c8a3  
 =  
 Mgg1_Mgg2() 

 
      
 with  
 open 
 ( 
 'magnaporthe.txt' 
 , 
 'r' 
 ) as f: 

 
          
 file  
 =  
 f.read() 

 
          
 infile  
 =  
 file 
 .split( 
 '>' 
 ) 

 
          
 for  
 m  
 in  
 infile: 

 
              
 for  
 i  
 in  
 v9_c8_a3a16: 

 
                      
 f1.write(i 
 + 
 ' ' 
 + 
 m) 

 
              
 for  
 i2  
 in  
 v9_not_c8a3a16: 

 
                      
 f2.write(i2 
 + 
 ' ' 
 + 
 m ) 

 
              
 for  
 i3  
 in  
 c8_not_v9a3a16: 

 
                      
 f3.write(i3 
 + 
 ' ' 
 + 
 m ) 

 
              
 for  
 i4  
 in  
 a3_not_v9c8a16: 

 
                      
 f4.write(i4 
 + 
 ' ' 
 + 
 m ) 

 
              
 for  
 i5  
 in  
 a16_not_v9c8a3: 

 
                      
 f5.write(i5 
 + 
 ' ' 
 + 
 m ) 

 
 message() 

关注

专栏目录