1 #-*- coding:utf-8 -*-
2 '''
3 从11c开始提取4 '''
5 importre6 importnumpy as np7 importos8 year = '17A'
9 ss="./data/edmd/"
10 #filename=ss+"/EDMDI1.17A"
11 try:12 os.rename(ss+"/EDMDI1.17A",ss+"/EDMDI1.txt")13 except:14 pass
15 f1=open(ss+"/EDMDI1.txt")16 p1=re.compile(r"^(?:\s{3}|X\s{2}|\W\s{2})([A-Z]{6})\s.+\n")17 list_tag=list()18 for line inf1.readlines():19 #print(line)
20 match1=re.findall(p1,line)21 #print(match1)
22 ifmatch1:23 for j inmatch1:24 list_tag.append(j)25 #filename_w1= ss+'%s'%list_tag[i]
26 print(list_tag)27 for i inrange(len(list_tag)):28 try:29 os.rename(ss+'%s_D.17A'%list_tag[i],ss+'%s.txt'%list_tag[i])30 except:31 break
32
33 filename_w= ss+'/new/%s_w.txt'%list_tag[i]34 ifos.path.exists(filename_w):35 os.remove(filename_w)36 #import os
37
38 #os.rename('./data/CODECO_D.02A','./data/CODECO_D.txt')
39 filename_r = ss+'%s.txt'%list_tag[i] #txt文件和当前脚本在同一目录下,所以不用写具体路径
40 #00010 UNH Message header M 1
41 pattern1 = re.compile(r"(^\d{5})\s{3}[A-Z]{3}.+[CM]\s{3}\d*\s{1,}\|{0,}\n")#00010
42 pattern1_2 = re.compile(r"^\d{5}\s{3}([A-Z]{3}).+[CM]\s{3}\d*\s{1,}\|{0,}\n")#UNH
43 pattern1_3 = re.compile(r"^\d{5}\s{3}[A-Z]{3}(.+)[CM]\s{3}\d*\s{1,}\|{0,}\n")#Message header
44 pattern1_4 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+([CM])\s{3}\d*\s{1,}\|{0,}\n")#C
45 pattern1_5 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}(\d*)\s{1,}\|{0,}\n")#1
46 #pattern2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d)*.+[CM]\s{3}\d*\-+\+\n" )#+结尾
47 #00050 ---- Segment group 1 ------------------ C 9----------------+
48 pattern4_1 = re.compile(r"(^\d{5}).+Segment\sgroup\s\d*.+[CM]\s{3}\d*.+\n")49 pattern4_2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*.+\n")50 pattern4_3 = re.compile(r"^\d{5}.+Segment\sgroup\s\d*.+([CM])\s{3}\d*.+\n")51 pattern4_4 = re.compile(r"^\d{5}.+Segment\sgroup\s\d*.+[CM]\s{3}(\d*).+\n")52 #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的的每个字段
53 #如00280 RNG Range details C 1---------------+|
54 pattern5_1 = re.compile(r"(^\d{5})\s{3}[A-Z]{3}.+[CM]\s{3}\d*\-+\+{1,10}\|{0,20}\n")55 pattern5_2 = re.compile(r"^\d{5}\s{3}([A-Z]{3}).+[CM]\s{3}\d*\-+\+{1,10}\|{0,20}\n")56 pattern5_3 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+([CM])\s{3}\d*\-+\+{1,10}\|{0,20}\n")57 pattern5_4 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}(\d*)\-+\+{1,10}\|{0,20}\n")58 #以下是确定层级关系
59 #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的
60 pattern5 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}\d*\-+\+\|{0,10}\n")61 #匹配每组的开头一行即有Segment group的以+、+|、+||、+|||……结尾的
62 pattern2_1 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\n" )#+结尾
63 pattern2_2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\n" )#+|结尾
64 pattern2_3 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\n" )#+||结尾
65 pattern2_4 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\n")66 pattern2_5 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\n")67 pattern2_6 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\|\n")68 pattern2_7 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\|\|\n")69 #匹配有同时多个组同时结束的情况,即以++、++|、++||……++、++|、++||……等结尾的
70 pattern3_1 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{2}\|{0,20}\n")#匹配++、++|、++||……等结尾
71 pattern3_2 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{3}\|{0,20}\n")#匹配+++、+++|、+++||……等结尾
72 pattern3_3 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{4}\|{0,20}\n")73 pattern3_4 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{5}\|{0,20}\n")74 pattern3_5 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{6}\|{0,20}\n")75 pattern3_6 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{7}\|{0,20}\n")76
77
78 flag =079 #listgr中第一个不为0的点
80 pos = -1
81 listgr =[0,0,0,0,0,0,0,0,0,0]82
83 fr =open(filename_r)84 w2 = open(filename_w,'a')#a代表追加 w代表重写
85 for line infr.readlines():86 matcher1 =re.findall(pattern1,line)87 matcher1_2 =re.findall(pattern1_2,line)88 matcher1_3 =re.findall(pattern1_3,line)89 matcher1_4 =re.findall(pattern1_4,line)90 matcher1_5 =re.findall(pattern1_5,line)91 matcher2_1 =re.findall(pattern2_1,line)92 matcher2_2 =re.findall(pattern2_2,line)93 matcher2_3 =re.findall(pattern2_3,line)94 matcher2_4 =re.findall(pattern2_4,line)95 matcher2_5 =re.findall(pattern2_5,line)96 matcher2_6 =re.findall(pattern2_6,line)97 matcher2_7 =re.findall(pattern2_7,line)98 matcher3_1 =re.findall(pattern3_1,line)99 matcher3_2 =re.findall(pattern3_2,line)100 matcher3_3 =re.findall(pattern3_3,line)101 matcher3_4 =re.findall(pattern3_4,line)102 matcher3_5 =re.findall(pattern3_5,line)103 matcher3_6 =re.findall(pattern3_6,line)104 matcher4_1 =re.findall(pattern4_1,line)105 matcher4_2 =re.findall(pattern4_2,line)106 matcher4_3 =re.findall(pattern4_3,line)107 matcher4_4 =re.findall(pattern4_4,line)108 matcher5 =re.findall(pattern5,line)109 matcher5_1 =re.findall(pattern5_1,line)110 matcher5_2 =re.findall(pattern5_2,line)111 matcher5_3 =re.findall(pattern5_3,line)112 matcher5_4 =re.findall(pattern5_4,line)113
114 if matcher4_1!=[]:115 w2.write("\n")116 for j inmatcher4_1:117 for k inj:118 w2.write(k)119 if matcher4_2!=[]:120 w2.write(",")121 #写入parent列
122 if pos!= -1:123 numgr =listgr[pos]124 else:125 numgr =0126 w2.write("SG"+str(numgr)+",")127 for j inmatcher4_2:128 for k inj:129 w2.write(k)130 if matcher4_3!=[]:131 flag = 3
132 w2.write(",")133 for j inmatcher4_3:134 for k inj:135 w2.write(k)136 if matcher4_4!=[]:137 w2.write(",")138 for j inmatcher4_4:139 for k inj:140 w2.write(k)141 if matcher5_1!=[]:142 w2.write("\n")143 for j inmatcher5_1:144 for k inj:145 w2.write(k)146 if matcher5_2!=[]:147 w2.write(",")148 #写入parent列
149 if pos!= -1:150 numgr =listgr[pos]151 else:152 numgr =0153 w2.write("SG"+str(numgr)+",")154 for j inmatcher5_2:155 for k inj:156 w2.write(k)157 if matcher5_3!=[]:158 flag = 3
159 w2.write(",")160 for j inmatcher5_3:161 for k inj:162 w2.write(k)163 if matcher5_4!=[]:164 w2.write(",")165 for j inmatcher5_4:166 for k inj:167 w2.write(k)168 #确定层级关系,也就是确定listgr
169 if(matcher5!=[]):170 for i inlistgr:171 if i==0:172 pos = listgr.index(i)-1
173 break
174 listgr[pos]=0175 if (matcher2_1!=[]):176 #print "2_1"
177 for j inmatcher2_1:178 #print j
179 if(listgr[0]==0):180 listgr[0]=j181 else:182 listgr[0]=0183 #print listgr
184 if (matcher2_2!=[]):185 for j inmatcher2_2:186 #numgr_d = j
187 if(listgr[1]==0):188 listgr[1]=j189 else:190 listgr[1]=0191 if (matcher2_3!=[]):192 for j inmatcher2_3:193 if(listgr[2]==0):194 listgr[2]=j195 else:196 listgr[2]=0197 if (matcher2_4!=[]):198 for j inmatcher2_4:199 if(listgr[3]==0):200 listgr[3]=j201 else:202 listgr[3]=0203 if (matcher2_5!=[]):204 for j inmatcher2_5:205 if(listgr[4]==0):206 listgr[4]=j207 else:208 listgr[4]=0209 if (matcher2_6!=[]):210 for j inmatcher2_6:211 if(listgr[5]==0):212 listgr[5]=j213 else:214 listgr[5]=0215 if (matcher2_7!=[]):216 for j inmatcher2_7:217 if(listgr[6]==0):218 listgr[6]=j219 else:220 listgr[6]=0221 if (matcher3_1!=[]):222 for i inlistgr:223 if i==0:224 pos = listgr.index(i)-1
225 break
226 listgr[pos]=0227 listgr[pos-1]=0228 if (matcher3_2!=[]):229 for i inlistgr:230 if i==0:231 pos = listgr.index(i)-1
232 break
233 for k in range((pos-2),(pos+1)):234 listgr[k]=0235 if (matcher3_3!=[]):236 for i inlistgr:237 if i==0:238 pos = listgr.index(i)-1
239 break
240 for k in range((pos-3),(pos+1)):241 listgr[k]=0242 if (matcher3_4!=[]):243 for i inlistgr:244 if i==0:245 pos = listgr.index(i)-1
246 break
247 for k in range(pos-4,pos+1):248 listgr[k]=0249 if (matcher3_5!=[]):250 for i inlistgr:251 if i==0:252 pos = listgr.index(i)-1
253 break
254 for k in range(pos-5,pos+1):255 listgr[k]=0256 if (matcher3_6!=[]):257 for i inlistgr:258 if i==0:259 pos = listgr.index(i)-1
260 break
261 for k in range(pos-6,pos+1):262 listgr[k]=0263 #确定层级关系结束
264 if (matcher1!=[]):265 flag = 1
266 w2.write("\n")267 for j inmatcher1:268 for k inj:269 w2.write(k)270 #print listgr
271 #判断当前lit不为0的位置
272 for i inlistgr:273 if i==0:274 pos = listgr.index(i)-1
275 break
276 if matcher1_2!=[]:277 flag = 2
278 w2.write(",")279 #写入parent列
280 if pos!= -1:281 numgr =listgr[pos]282 else:283 numgr =0284 w2.write("SG"+str(numgr)+",")285 for j inmatcher1_2:286 for k inj:287 w2.write(k)288 if matcher1_3!=[]:289 flag = 3
290 w2.write(",")291 for j inmatcher1_3:292 for k inj:293 w2.write(k)294 if matcher1_4!=[]:295 flag = 4
296 w2.write(",")297 for j inmatcher1_4:298 for k inj:299 w2.write(k)300 if ((matcher1_5!=[])and(flag ==4)):301 flag = 5
302 w2.write(",")303 for j inmatcher1_5:304 for k inj:305 w2.write(k)306
307 w2.close( )308 fr.close()309
310 f2_w= open(ss+'/new/%s.txt'%year,'a')311
312 for i inrange(len(list_tag)):313 f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])314 for line inf2_r:315 #for j in line:
316 f2_w.write(year+','+line)317 f2_r.close()318 print("--%i--is ok"%i)319 f2_w.close()320
321 #if __name__ == '__main__':
322
323
324 """
325 特殊情况326
327
328
329 """