去除标点
test['clean'] = test.content.apply(lambda x:re.sub(r'[\s+\.\!\/_,$%^*(+\"\')]+|[+——()?【】“”!,。?、~@#¥%……&*()]+::', " ",x))
取出中文
chapter['ch'] = chapter.content.apply(lambda x:re.sub(r'[^\u4e00-\u9fa5]', "",x))
取两个字符中间的内容(返回list)
r = re.findall(".*共(.*)页.*", soup.find('td',align = 'left').text)