废话不多做,直接开干!
用到知识点
response.encoding = 'gb2312' # 表示将gb2312编码的字符串str1转换成unicode编码。
看完整代码
# coding=gb2312
import requests
from lxml import etree
if __name__ == '__main__':
url = 'https://pic.netbian.com/4kmeinv/'
headers = {
# "User-Agent": "Mozilla/5.0(Windows NT 10.0;Win64;x64;rv: 96.0) Gecko/20100101 Firefox/96.0"
}
response = requests.get(url=url, headers=headers)
response.encoding = 'gb2312' # 手动对数据进行编码
page_text = response.text
print(page_text)
# 数据解析 alt属性
# tree = etree.HTML(page_text)
# li_list = tree.xpath('//div[@class="slist"]/ul/li')
# for li in li_list:
# img_src = 'https://pic.netbian.com'+li.xpath('./a/img/@src')[0]
# img_name = li.xpath('./a/img/@alt')[0]+'.jpg'
# # 处理中文乱码的解决方案
# img_name = img_name.encode('raw_unicode_escape')
# ddd = 'cosplay\\u052d\\ufffd\\ufffd \\u027a\\ufffd\\ufffd\\ufffd\\ufffd\\ufffd\\u013a\\ufffd cos\\ufffd\\ufffd\\u016e4k\\ufffd\\ufffd\\u05bd.jpg'
# ddd = ddd.encode('gbk')
# print(ddd)
# print(img_name)
运行正常显示中文了