Python requests 是一个常用的 HTTP 请求库,可以方便地向网站发送 HTTP 请求,并获取响应结果。
一、导入requests包
二、设置url(网址,列:http//www/baidu.com)
三、requests发送请求get/post(设置配置参数)【也可只设置url,但可能会出错】列:
import requests
# 设置url
url_path = f"http://www.baidu.com"
# 设置请求头
User_Agent = {
'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1",
'Connection': 'close'
}
coo = " balh_server_custom_tw=https://bili.tuturu.top;" \
" balh_server_custom_hk=https://bili-proxy.98e.org;" \
" balh_server_custom=https://bili.tuturu.top;"
coos = coo.split(";")
# 以字典的方式储存
cookies_dict = {cookies.split("=")[0]: cookies.split("=")[-1] for cookies in coos}
# cookies的设置
proxies = {
"http": "http://111.40.62.176:9091",
"https": "http://111.40.62.176:9091"
}
# url == 请求网址 headers == 请求头 cookies == cookies参数请求 proxies == ip代理 verify=False == 忽略CA证书
# text(返回响应的内容,unicode 类型数据---字符串) content(返回响应的内容,以字节为单位---二进制)
reopen = requests.get(url=url_path, headers=User_Agent, cookies=cookies_dict, proxies=proxies, verify=False).text
print(reopen)
四、可使用re正则表达式或xpath获取标签内容,列:
'''由上得到的reopen'''
import re
xe = '<li>.*?<img src="(.*?)" alt.*?</li>'
img_src = re.findall(xe, reopen, re.S)
print(img_sre)
# 或
from lxml import etree
er = etree.HTML(reopen)
data = er.xpath("/html/body/div[3]/div/div/ul[1]/li[2]/span[2]/a/@href")
print(date)