1 需求
爬取百度首页的信息,添加headers参数并处理数据乱码。
2 代码实现
import requests
import re
headers = {"user-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'}
url = 'https://www.baidu.com/'
res = requests.get(url=url, headers=headers).text
try:
res = res.encode('ISO-8859-1').decode('utf-8')
except:
try:
res = res.encode('IOS-8859-1').decode('gbk')
except:
res = res
print(res)