python读取文件并修改_python文件操作,读取,修改,合并

1 #-*- coding:utf-8 -*-

2 '''

3 从11c开始提取4 '''

5 importre6 importnumpy as np7 importos8 year = '17A'

9 ss="./data/edmd/"

10 #filename=ss+"/EDMDI1.17A"

11 try:12 os.rename(ss+"/EDMDI1.17A",ss+"/EDMDI1.txt")13 except:14 pass

15 f1=open(ss+"/EDMDI1.txt")16 p1=re.compile(r"^(?:\s{3}|X\s{2}|\W\s{2})([A-Z]{6})\s.+\n")17 list_tag=list()18 for line inf1.readlines():19 #print(line)

20 match1=re.findall(p1,line)21 #print(match1)

22 ifmatch1:23 for j inmatch1:24 list_tag.append(j)25 #filename_w1= ss+'%s'%list_tag[i]

26 print(list_tag)27 for i inrange(len(list_tag)):28 try:29 os.rename(ss+'%s_D.17A'%list_tag[i],ss+'%s.txt'%list_tag[i])30 except:31 break

32

33 filename_w= ss+'/new/%s_w.txt'%list_tag[i]34 ifos.path.exists(filename_w):35 os.remove(filename_w)36 #import os

37

38 #os.rename('./data/CODECO_D.02A','./data/CODECO_D.txt')

39 filename_r = ss+'%s.txt'%list_tag[i] #txt文件和当前脚本在同一目录下,所以不用写具体路径

40 #00010 UNH Message header M 1

41 pattern1 = re.compile(r"(^\d{5})\s{3}[A-Z]{3}.+[CM]\s{3}\d*\s{1,}\|{0,}\n")#00010

42 pattern1_2 = re.compile(r"^\d{5}\s{3}([A-Z]{3}).+[CM]\s{3}\d*\s{1,}\|{0,}\n")#UNH

43 pattern1_3 = re.compile(r"^\d{5}\s{3}[A-Z]{3}(.+)[CM]\s{3}\d*\s{1,}\|{0,}\n")#Message header

44 pattern1_4 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+([CM])\s{3}\d*\s{1,}\|{0,}\n")#C

45 pattern1_5 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}(\d*)\s{1,}\|{0,}\n")#1

46 #pattern2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d)*.+[CM]\s{3}\d*\-+\+\n" )#+结尾

47 #00050 ---- Segment group 1 ------------------ C 9----------------+

48 pattern4_1 = re.compile(r"(^\d{5}).+Segment\sgroup\s\d*.+[CM]\s{3}\d*.+\n")49 pattern4_2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*.+\n")50 pattern4_3 = re.compile(r"^\d{5}.+Segment\sgroup\s\d*.+([CM])\s{3}\d*.+\n")51 pattern4_4 = re.compile(r"^\d{5}.+Segment\sgroup\s\d*.+[CM]\s{3}(\d*).+\n")52 #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的的每个字段

53 #如00280 RNG Range details C 1---------------+|

54 pattern5_1 = re.compile(r"(^\d{5})\s{3}[A-Z]{3}.+[CM]\s{3}\d*\-+\+{1,10}\|{0,20}\n")55 pattern5_2 = re.compile(r"^\d{5}\s{3}([A-Z]{3}).+[CM]\s{3}\d*\-+\+{1,10}\|{0,20}\n")56 pattern5_3 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+([CM])\s{3}\d*\-+\+{1,10}\|{0,20}\n")57 pattern5_4 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}(\d*)\-+\+{1,10}\|{0,20}\n")58 #以下是确定层级关系

59 #匹配每组的单独结尾的一行即没有Segment group的以+、+|、+||、+|||……结尾的

60 pattern5 = re.compile(r"^\d{5}\s{3}[A-Z]{3}.+[CM]\s{3}\d*\-+\+\|{0,10}\n")61 #匹配每组的开头一行即有Segment group的以+、+|、+||、+|||……结尾的

62 pattern2_1 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\n" )#+结尾

63 pattern2_2 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\n" )#+|结尾

64 pattern2_3 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\n" )#+||结尾

65 pattern2_4 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\n")66 pattern2_5 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\n")67 pattern2_6 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\|\n")68 pattern2_7 = re.compile(r"^\d{5}.+Segment\sgroup\s(\d*).+[CM]\s{3}\d*\-+\+\|\|\|\|\|\|\n")69 #匹配有同时多个组同时结束的情况,即以++、++|、++||……++、++|、++||……等结尾的

70 pattern3_1 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{2}\|{0,20}\n")#匹配++、++|、++||……等结尾

71 pattern3_2 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{3}\|{0,20}\n")#匹配+++、+++|、+++||……等结尾

72 pattern3_3 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{4}\|{0,20}\n")73 pattern3_4 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{5}\|{0,20}\n")74 pattern3_5 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{6}\|{0,20}\n")75 pattern3_6 = re.compile(r"^\d{5}.+[CM]\s{3}\d*\-+\+{7}\|{0,20}\n")76

77

78 flag =079 #listgr中第一个不为0的点

80 pos = -1

81 listgr =[0,0,0,0,0,0,0,0,0,0]82

83 fr =open(filename_r)84 w2 = open(filename_w,'a')#a代表追加 w代表重写

85 for line infr.readlines():86 matcher1 =re.findall(pattern1,line)87 matcher1_2 =re.findall(pattern1_2,line)88 matcher1_3 =re.findall(pattern1_3,line)89 matcher1_4 =re.findall(pattern1_4,line)90 matcher1_5 =re.findall(pattern1_5,line)91 matcher2_1 =re.findall(pattern2_1,line)92 matcher2_2 =re.findall(pattern2_2,line)93 matcher2_3 =re.findall(pattern2_3,line)94 matcher2_4 =re.findall(pattern2_4,line)95 matcher2_5 =re.findall(pattern2_5,line)96 matcher2_6 =re.findall(pattern2_6,line)97 matcher2_7 =re.findall(pattern2_7,line)98 matcher3_1 =re.findall(pattern3_1,line)99 matcher3_2 =re.findall(pattern3_2,line)100 matcher3_3 =re.findall(pattern3_3,line)101 matcher3_4 =re.findall(pattern3_4,line)102 matcher3_5 =re.findall(pattern3_5,line)103 matcher3_6 =re.findall(pattern3_6,line)104 matcher4_1 =re.findall(pattern4_1,line)105 matcher4_2 =re.findall(pattern4_2,line)106 matcher4_3 =re.findall(pattern4_3,line)107 matcher4_4 =re.findall(pattern4_4,line)108 matcher5 =re.findall(pattern5,line)109 matcher5_1 =re.findall(pattern5_1,line)110 matcher5_2 =re.findall(pattern5_2,line)111 matcher5_3 =re.findall(pattern5_3,line)112 matcher5_4 =re.findall(pattern5_4,line)113

114 if matcher4_1!=[]:115 w2.write("\n")116 for j inmatcher4_1:117 for k inj:118 w2.write(k)119 if matcher4_2!=[]:120 w2.write(",")121 #写入parent列

122 if pos!= -1:123 numgr =listgr[pos]124 else:125 numgr =0126 w2.write("SG"+str(numgr)+",")127 for j inmatcher4_2:128 for k inj:129 w2.write(k)130 if matcher4_3!=[]:131 flag = 3

132 w2.write(",")133 for j inmatcher4_3:134 for k inj:135 w2.write(k)136 if matcher4_4!=[]:137 w2.write(",")138 for j inmatcher4_4:139 for k inj:140 w2.write(k)141 if matcher5_1!=[]:142 w2.write("\n")143 for j inmatcher5_1:144 for k inj:145 w2.write(k)146 if matcher5_2!=[]:147 w2.write(",")148 #写入parent列

149 if pos!= -1:150 numgr =listgr[pos]151 else:152 numgr =0153 w2.write("SG"+str(numgr)+",")154 for j inmatcher5_2:155 for k inj:156 w2.write(k)157 if matcher5_3!=[]:158 flag = 3

159 w2.write(",")160 for j inmatcher5_3:161 for k inj:162 w2.write(k)163 if matcher5_4!=[]:164 w2.write(",")165 for j inmatcher5_4:166 for k inj:167 w2.write(k)168 #确定层级关系,也就是确定listgr

169 if(matcher5!=[]):170 for i inlistgr:171 if i==0:172 pos = listgr.index(i)-1

173 break

174 listgr[pos]=0175 if (matcher2_1!=[]):176 #print "2_1"

177 for j inmatcher2_1:178 #print j

179 if(listgr[0]==0):180 listgr[0]=j181 else:182 listgr[0]=0183 #print listgr

184 if (matcher2_2!=[]):185 for j inmatcher2_2:186 #numgr_d = j

187 if(listgr[1]==0):188 listgr[1]=j189 else:190 listgr[1]=0191 if (matcher2_3!=[]):192 for j inmatcher2_3:193 if(listgr[2]==0):194 listgr[2]=j195 else:196 listgr[2]=0197 if (matcher2_4!=[]):198 for j inmatcher2_4:199 if(listgr[3]==0):200 listgr[3]=j201 else:202 listgr[3]=0203 if (matcher2_5!=[]):204 for j inmatcher2_5:205 if(listgr[4]==0):206 listgr[4]=j207 else:208 listgr[4]=0209 if (matcher2_6!=[]):210 for j inmatcher2_6:211 if(listgr[5]==0):212 listgr[5]=j213 else:214 listgr[5]=0215 if (matcher2_7!=[]):216 for j inmatcher2_7:217 if(listgr[6]==0):218 listgr[6]=j219 else:220 listgr[6]=0221 if (matcher3_1!=[]):222 for i inlistgr:223 if i==0:224 pos = listgr.index(i)-1

225 break

226 listgr[pos]=0227 listgr[pos-1]=0228 if (matcher3_2!=[]):229 for i inlistgr:230 if i==0:231 pos = listgr.index(i)-1

232 break

233 for k in range((pos-2),(pos+1)):234 listgr[k]=0235 if (matcher3_3!=[]):236 for i inlistgr:237 if i==0:238 pos = listgr.index(i)-1

239 break

240 for k in range((pos-3),(pos+1)):241 listgr[k]=0242 if (matcher3_4!=[]):243 for i inlistgr:244 if i==0:245 pos = listgr.index(i)-1

246 break

247 for k in range(pos-4,pos+1):248 listgr[k]=0249 if (matcher3_5!=[]):250 for i inlistgr:251 if i==0:252 pos = listgr.index(i)-1

253 break

254 for k in range(pos-5,pos+1):255 listgr[k]=0256 if (matcher3_6!=[]):257 for i inlistgr:258 if i==0:259 pos = listgr.index(i)-1

260 break

261 for k in range(pos-6,pos+1):262 listgr[k]=0263 #确定层级关系结束

264 if (matcher1!=[]):265 flag = 1

266 w2.write("\n")267 for j inmatcher1:268 for k inj:269 w2.write(k)270 #print listgr

271 #判断当前lit不为0的位置

272 for i inlistgr:273 if i==0:274 pos = listgr.index(i)-1

275 break

276 if matcher1_2!=[]:277 flag = 2

278 w2.write(",")279 #写入parent列

280 if pos!= -1:281 numgr =listgr[pos]282 else:283 numgr =0284 w2.write("SG"+str(numgr)+",")285 for j inmatcher1_2:286 for k inj:287 w2.write(k)288 if matcher1_3!=[]:289 flag = 3

290 w2.write(",")291 for j inmatcher1_3:292 for k inj:293 w2.write(k)294 if matcher1_4!=[]:295 flag = 4

296 w2.write(",")297 for j inmatcher1_4:298 for k inj:299 w2.write(k)300 if ((matcher1_5!=[])and(flag ==4)):301 flag = 5

302 w2.write(",")303 for j inmatcher1_5:304 for k inj:305 w2.write(k)306

307 w2.close( )308 fr.close()309

310 f2_w= open(ss+'/new/%s.txt'%year,'a')311

312 for i inrange(len(list_tag)):313 f2_r = open(ss+'/new/%s_w.txt'%list_tag[i])314 for line inf2_r:315 #for j in line:

316 f2_w.write(year+','+line)317 f2_r.close()318 print("--%i--is ok"%i)319 f2_w.close()320

321 #if __name__ == '__main__':

322

323

324 """

325 特殊情况326

327

328

329 """

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值