上一篇中提到了提取数据文件的操作;对于提取的文件,我们还需要做一些处理步骤,在这里我们是对solidity语言的注释和空格的处理,solidity语言和大部分语言的注释一样也是"//"和"/* */",同时我们还需要处理删除注释部分的空格。
(1)Handle Comment of solidity source code file("//")
# The comment of file will be deleted if exist lines[i]
def Handle_single_comment(lines, i):
index = lines[i].find("//")
if index != -1:
# lines[i] = lines[i][0:index]
lines[i] = lines[i].replace(lines[i], '', 1)
lines[i] += ''
(2)Handle Comment of solidity source code file("/* */")
# @return -1:the Line is Comment Line,should delete this line
# @return -2:Only begin Comment found in this Line
# @return 0:Not find Comment
def Handle_document_comment(lines, i):
global flag
while True:
if not flag:
index = lines[i].find("/*")
if index != -1:
flag = True
index2 = lines[i].find("*/", index + 2)
if index2 != -1:
lines[i] = lines[i].replace(lines[i], '', 1)
flag = False # continue look for comment
else:
lines[i] = lines[i].replace(lines[i], '', 1) # only find "begin comment
lines[i] += ''
return -2
else:
return 0 # not find
else:
index2 = lines[i].find("*/")
if index2 != -1:
flag = False
lines[i] = lines[i].replace(lines[i], '', 1) # continue look for comment
else:
return -1 # should delete this
(3)Remove Comment of file
# At last print the handled result
def RemoveComment(file):
global flag
f = open(file, "r")
lines = f.readlines()
f.close()
length = len(lines)
i = 0
while i < length:
ret = Handle_document_comment(lines, i)
if ret == -1:
if flag == False:
print("There must be some wrong")
del lines[i]
i -= 1
length -= 1
elif ret == 0:
Handle_single_comment(lines, i)
else:
pass
i += 1
(4)write result back to file
def writeResult(file, lines):
f = open(file, "w")
for line in lines:
if line == '': # 遇到空格忽略,跳出该次循环
continue
f.write(line)
f.close()
(5)print result
def Output(lines):
for line in lines:
if line == '':
continue
print(line)
以上步骤可以解决去除代码源文件中注释("//","/* */")和空格。
完整的可运行的代码可见:https://github.com/Messi-Q/SmartContract-Detection-Based-DeepLearning/blob/master/tools/remove_comment.py.