主要利用正则表达式来去除行号
import re
# 输出文件的文件名
output = open('data.txt', 'w')
# 输入文件的文件名
file_object = open('in.txt')
try:
line_str = file_object.readline()
while line_str:
line_str = file_object.readline()
p = re.findall(r'[0-9]{1,4}:',line_str)
if len(p)!=0:
# print (p[0])
output.write(line_str.replace(p[0],''))
finally:
file_object.close( )
output.close()
效果:
处理前
1: import java.io.BufferedReader;
2: import java.io.FileInputStream;
3: import java.io.IOException;
4: import java.io.InputStreamReader;
5:
6: /**
7: *
8: * @author
9: *
10: *
11: */
处理后
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
/**
*
* @author
*
*
*/