01request.测试
import requests
class RequestSpider(object):
def __init__(self):
url = "https://www.baidu.com/"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'
}
self.response = requests.get(url=url, headers=headers)
def run(self):
data = self.response.content
requests_headers = self.response.request.headers
response_headers = self.response.headers
code = self.response.status_code
requests_cookie = self.response.request._cookies
print(requests_cookie)
response_cookie = self.response.cookies
print(requests_cookie)
RequestSpider().run()
02request.百度网址拼接
import requests
url = "https://www.baidu.com/s?"
headers = {
'User-Agent': 'Mozilla/5.0 (Window NT 10.0; Win64; x64) ApleWebKit/.36 (KHTL, like Gcko) Chrome/68.03440.75 Safari537.'
}
params = {
'wd': '美女'
}
response = requests.get(url=url, headers=headers, params=params)
data = response.content.decode()
with open('baidu.html', 'w', encoding='utf-8') as f:
f.write(data)
requests.post(url=url, data=(参数), json=(参数))
03request.json处理
import requests
import json
url = "https://api.github.com/user"
response = requests.get(url)
data = response.json()
print(data['message'])
04内网认证
import requests
url = ""
data = {
}
auth = (user,pwd)
response = requests.get(url,auth=auth)
05request.ip代理
import requests
url = "http://baidu.com"
headers = {
'User-Agent': 'Mozil/5.0 (Widows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/8.0.340.75 Safari/7.36'
}
proxy = {'http':'115.223.7.110:80'}
respones = requests.get(url=url, headers=headers, proxies=proxy)
print(respones.status_code)
06verify.忽略证书访问
import requests
url = "https://www.12306.cn/"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/6.0.440.75 Safari/37.36'
}
response = requests.get(url=url,headers=headers, verify = False)
data = response.content.decode()
with open('03-ssl.html','w', encoding='utf-8')as f:
f.write(data)
07cookie.字典推导式
import requests
member_url = "https://www.yaozh.com/member/"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'
}
cookies = 'acw_tc=2f624a3816063084872425645e1a39db08d805ba42e9362646c61150b84459; PHPSESSID=5d55asfp4jq8qjljpdncefv42; Hm_lvt_65968db3ac154c3089d7f9a4cbb98c94=1606308489; _ga=GA1.2.358237203.1606308489; _gid=GA1.2.81185910.1606308489; _gat=1; Hm_lpvt_65968db3ac154c3089d7fa4cbb98c94=606308591; yaozh_logintime=1606308608; yaozh_user=1011508%09%E8%8B%8F%7%A9%86%E5%86%B0%E7%99%BD%E6%9C%88%E6%99%A8; yaozh_userId=1011508; yaozh_jobstatus=kptt67UcJie6zKnFSe2JyYnoaSZ5ZmmpSdg26qb21rg66flM6bh5%2BscZJwbIVN7fBLDecc6%2BVM%2FSJHz78b0dmDdKRtmnCH0Jqq1ZemzNL2C34c39eb300BDcD84ff78fcbFa0e16Hg5%2FXm2iVb4eVm5FpamVwZphwU27UcJmeW6vKpZeFoNWblZebWZZrlZqYkWttZXBjmXJTbt4%3Dded04249ebd3f7f8fdf580ddf057dc78; db_w_auth=835368%09%E8%8B%8F%E7%A9%86%E5%86%B0%E7%99%BD%E6%9C%88%E6%99%A8; UtzD_f52b_saltkey=TYzr7923; UtzD_f52b_lastvisit=1606305009; UtzD_f52b_lastact=1606308609%09uc.php%09; UtzD_f52b_auth=44539GcljwyxnF28hO62fM7%2BiB5r6H7G7FwkTDCk4rpBIsMb%2Br6V%2Fa%2FCAe4n31ipRHhVvcRN%2B20L6QceoqsP5KEAFik; yaozh_uidhas=1; yaozh_mlogin=1606308611; acw_tc=2f624a3816063084872425645e1a39db08d80ba42e9362646c61150b84459'
cookies_list = cookies.split('; ')
cook_dict = {cookie.split('=')[0]:cookie.split('=')[1] for cookie in cookies_list}
respone = requests.get(url=member_url, headers=headers, cookies=cook_dict)
data = respone.content.decode()
with open("药智网.html",'w',encoding="utf-8") as f:
f.write(data)
08session.账号登录获取cookie
import requests
member_url = "https://www.yaozh.com/member/"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36'
}
session = requests.session()
login_url = 'https://www.yaozh.com/login'
login_from_data = {
'username': '苏穆冰白月晨',
'pwd': '119000sr',
'formhash': '2E94DD8BE',
'backurl': '%F%www.yaozh.com',
}
login_response = session.post(url=login_url, data=login_from_data, headers=headers)
data = session.get(url=member_url, headers=headers).content.decode()
with open("药智网.html", 'w', encoding="utf-8") as f:
f.write(data)