#!/usr/bin/python #coding=utf8 __doc__=""" This class is used to extract text from a string content, mostly used when we need to extract what we want from a downloaded html page """ __author__="""jemygraw@gmail.com""" class TextUtil: def __init__(self,content): self.content=content self.start_index=0 self.end=len(content) def setStart(self,start): self.start_index=start def setEnd(self,end): self.end=end def selectText(self,start,end): self.start_flag=start self.end_flag=end from_index=self.content.find(start,self.start_index) if from_index!=-1: end_index=self.content.find(end,from_index+len(start)) if end_index!=-1 and end_index<=self.end: self.start_index=end_index+len(end) self.from_index=from_index self.end_index=end_index return True return False def extractText(self,fromFlag=None,toFlag=None): if fromFlag is None and toFlag is None: return self.content[self.from_index+len(self.start_flag):self.end_index] else: from_index=self.content.find(fromFlag,self.from_index+len(self.start_flag)) to_index=self.content.find(toFlag,from_index+len(from_index)) return self.content[from_index+len(fromFlag):to_index] def deselectText(self): self.from_index=0 self.end_index=0 self.start_index=0