根据requests请求回来的页面进行解码,直接调用即可;
def code_conversion(self,response):
'''
解决requests的编码问题
:param response: requests库请求过来的响应体
:return:
'''
html = response.content
htmltxt = ''
encode_type = chardet.detect(html)['encoding']
if encode_type == None:
try:
htmltxt = gzip.decompress(html).decode('GB2312', 'ignore')
except Exception as aa:
print(aa)
print('使用压缩文件转换编码时出现了问题')
else:
try:
htmltxt = response.content.decode(str(encode_type), 'ignore')
except Exception as ee:
print(ee)
print('编码格式出现了问题,需要转换的编码为',encode_type)
return htmltxt