import html
def get_article_content(url):
"""
获取文章内容
:return:
"""
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0'}
html_content = requests.get(url, headers=header).content
real_html = html_content.decode('utf8')
return html.unescape(real_html)
html模块中有个方法unescape,可以处理html中转义字符