python啥版本。。line是string?
看文档https://docs.python.org/2/library/stdtypes.html#str.translate
unicode 真麻烦。。上了次stackoverflow才找到。。。
import unicodedata
import sys
tbl = dict.fromkeys(i for i in xrange(sys.maxunicode)
if unicodedata.category(unichr(i)).startswith('P'))
def remove_punctuation(text):
return text.translate(tbl)
#你爽了直接 remove_punctuation(line)
#或者屌屌的这样:
import regex as re
def remove_punctuation(text):
return re.sub(ur"\p{P}+", "", text)