#!D:\Python32
# -*- coding: utf-8-*-
# 过滤JAVA程序中的注释
# 如果字符串中有注释符号的话会有问题。
import os
import re
import io
# 改这个目录!!!
top_dir = "E:\\work2\\";
# 状态
S_INIT = 0;
S_SLASH = 1;
S_BLOCK_COMMENT = 2;
S_BLOCK_COMMENT_DOT = 3;
S_LINE_COMMENT = 4;
S_STR = 5;
S_STR_ESCAPE = 6;
def trim_dir(path):
print("dir:" + path);
for root, dirs, files in os.walk(path):
for name in files:
trim_file(os.path.join(root, name))
#for name in dirs:
#trim_dir(os.path.join(root, name))
def trim_file(path):
print("file:" + path);
if re.match(r".*?\.(java|c|cpp|h)$", path):
print("process");
else:
print("ignore");
return;
bak_file = path + ".bak";
try:
os.rename(path, bak_file);
except:
print "bak except",bak_file;
fp_src = open(bak_file);
fp_dst = open(path, 'w');
state = S_INIT;
for line in fp_src.readlines():
for c in line:
if state == S_INIT:
if c == '/':
state = S_SLASH;
elif c == '"':
state = S_STR;
fp_dst.write(c);
else:
fp_dst.write(c);
elif state == S_SLASH:
if c == '*':
state = S_BLOCK_COMMENT;
elif c == '/':
state = S_LINE_COMMENT;
else:
fp_dst.write('/');
fp_dst.write(c);
state=S_INIT;
elif state == S_BLOCK_COMMENT:
if c == '*':
state = S_BLOCK_COMMENT_DOT;
elif state == S_BLOCK_COMMENT_DOT:
if c == '/':
state = S_INIT;
elif c=='*':
state=S_BLOCK_COMMENT_DOT; #再次碰到*号还是要继续状态,否则会出错
else:
state = S_BLOCK_COMMENT;
elif state == S_LINE_COMMENT:
if c == '\n':
state = S_INIT;
fp_dst.write(c);
elif state == S_STR:
if c == '\\':
state = S_STR_ESCAPE;
elif c == '"':
state = S_INIT;
fp_dst.write(c);
elif state == S_STR_ESCAPE:
# 这里未完全实现全部序列,如\oNNN \xHH \u1234 \U12345678,但没影响
state = S_STR;
fp_dst.write(c);
fp_src.close();
fp_dst.close();
#os.remove(bak_file);
trim_dir(top_dir);
需要说明的是这段程序来自:http://blog.csdn.net/codearhat/article/details/6852483#comments
但是里面有两个问题,会引起错误,现在经验证可以在我的项目中使用,但是不保证完全没有错误,有错误希望和我联系,也可以和原作者联系