#_*_ coding:utf-8 _*_
importurllib2importre#import sys
#reload(sys)#sys.setdefaultencoding(‘utf-8‘)
classTool:
removeImg= re.compile(r‘
‘)
removeAddr= re.compile(r‘|‘)
replaceLine= re.compile(r‘
|
|
|‘)
replaceTD= re.compile(r‘
‘)replacePara= re.compile(r‘
‘)replaceBR= re.compile(r‘
|
‘)
removeExtraTag= re.compile(r‘<.>‘)defreplace(self,text):
text= re.sub(self.removeImg,"",text)
text= re.sub(self.removeAddr,"",text)
text= re.sub(self.replaceLine,"\n",text)
text= re.sub(self.replaceTD,"\t",text)
text= re.sub(self.replacePara,"\n"+" ",text)
text= re.sub(self.replaceBR,"\n",text)
text= re.sub(self.removeExtraTag,"",text)re