答:
# _*_ coding:utf-8 _*_
def get_math_line(source):
# re chinese
# 源文件编码需要指定
source = source.decode('gb2312')
pattern = '(\d*).*(受到.*影响)'
pattern = pattern.decode('utf-8')
import re
prog = re.compile(pattern)
result = prog.search(source)
if result:
return result.group(1), result.group(2)
else:
return None
with open('source.txt', 'rb') as src_file:
with open('dest.txt', 'w') as dst_file:
for line in src_file.readlines():
val = get_math_line(line)
if val:
print "%s 匹配到 %s" % (val[0].encode('utf-8'), val[1].encode('utf-8'))
else:
dst_file.write(line.strip('\n'))