第1关 requests 基础
import requests
def get_html(url):
'''
两个参数
:param url:统一资源定位符,请求网址
:param headers:请求头
:return:html
'''
# ***************** Begin ******************** #
url=url
params = {'wd': 'requests'}
# 补充请求头
headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/"
"537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
# get请求网页
response = requests.get(url, params=params)
response.encoding = 'utf-8'
# 获取网页信息文本
html=requests.get(url).text
# ***************** End ******************** #
return html
第2关 requests 进阶
import requests
def get_html(url):
'''
两个参数
:param url:统一资源定位符,请求网址
:param headers:请求头
:return html 网页的源码
:return sess 创建的会话
'''
# ***************** Begin ******************** #
url=url
params = {'wd': 'requests'}
# 补充请求头
headers={'User-Agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/'
'537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Mobile Safari/537.36',
"Cookie": "BAIDUID=53B7CC4BFCDC39D2EF625C13D285429D:FG=1; BIDUPSID=53B7CC4BFCDC39D2EF625C13D285429D; "
"PSTM=1591665716; BD_UPN=12314753; BDUSS=2N2ajRYZnI2cVlZN1FRemlWNU9FV1lSZFM3SnZBS0dvRW44WFRCUTRWck1mUVpmR"
"VFBQUFBJCQAAAAAAAAAAAEAAAAoKJzNMTIyMzM4ODQ1uNW41QAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
"AAAAAAAAAAAAMzw3l7M8N5eS; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; sug=3; sugstore=1; ORIGIN=0; bdime=0; "
"H_PS_PSSID=1456_31672_32139_31253_32046_32230_31708_32295_26350_22160; delPer=0; BD_CK_SAM=1; PSINO=6; "
"H_PS_645EC=3b86vFCd303Aw0wmqvkcAGpfxU4oXfwYcs6jRd1RnxihTsvhfqaVB%2BIoeBs; BDSVRTM=0" }
response = requests.get(url=url)
# 创建Session, 并使用Session的get请求网页
sess = requests.session()
response = sess.post(url)
# 获取网页信息文本
html=requests.get(url).text
response_home = sess.get(url=url)
# ****************** End ********************* #
return html, sess