1. 去除标点
def removeBianDian(self,word):
if isinstance(word,str):
word = word.decode("utf8")
string = re.sub("[\.\!\/_,$%^*(+\"\']+|[+——!,。??、~@·#¥%……&*(:)\)-]+".decode("utf8"), "".decode("utf8"),word)
return string
2. 圆角转半角
def strQ2B(self,ustring):
"""全角转半角"""
if isinstance(ustring,str):
ustring = ustring.decode("utf8")
rstring = ""
for uchar in ustring:
inside_code=ord(uchar)
if inside_code == 12288:
inside_code = 32
elif (inside_code >= 65281 and inside_code <= 65374):