按要求,把一堆注释里的字母筛出来。
from myTool import *
if __name__ == '__main__':
html = get_content('ocr.html')
text = ''
for i in range(37, 1257):
text += html[i]
print text
ans = ''
for char in text:
if char>='a' and char<='z' or char>='A' and char<='Z':
ans += char
print ans
get_content() 是方便读取网页内容写的- -、
结果是equality