GET请求类型:
代码总览
import requests
url = "www.xxx.com"
params = {
}
headers = {
"Cookie":"",
"Host":"",
"User-Agent":"",
}
verif = True
proxies = {
}
auth = ("username","password")
timeout = 10
r = requests.get(url,params=params,headers=headers,verify=verify,proxies=proxies,auth=auth,timeout=timeout)
r.encoding = 'utf-8'
theData = r.text
步骤说明
1.首先导包
import requests
2.设置目标链接
url = "www.xxx.com"
3.设置参数,以智联招聘为例如下:
params = {
'start': '90',
'pageSize': '90',
'cityId': '489',
'workExperience': '-1',
'education': '-1',
'companyType': '-1',
'employmentType': '-1',
'jobWelfareTag': '-1',
'kw': 'python',
'kt': '3',
}
4.设置请求头,以知乎为例
headers = {
"Cookie":"_zap=91db937d-1873-427a-a1e4-4f3e593b2ef7; d_c0="AKClLCx9PQ-PTn9bTvkL29POnonke-d9iBo=|1554604491"; q_c1=6580f817fd2146d5b49e38ddf7437c72|1554604493000|1554604493000; __gads=ID=1a21c798fac2ab1a:T=1554604811:S=ALNI_MYsqJyrVqokxJWUR_evB1TKOg0GRA; _ga=GA1.2.454841891.1557395609; _xsrf=jdiREBIxqH39ZiyFYETcQSjnSgDGsSKv; capsion_ticket="2|1:0|10:1570887992|14:capsion_ticket|44:MjMzNDRhZWJjYWIwNDM1NDk5YmFkMzk3NjFhOWM5NjI=|52cb7d2eb90cd49772824a8295f0025c8fa0f9dd0808e39cc7b34bedf64817b5"; r_cap_id="ODNlZjMxNjg4MDAxNDk5Y2FlNzA0NzgxMWVmZDQyMzM=|1570887993|0f0d14769158d71b868e9c1a1c4f8ae41c0cdc27"; cap_id="MWUzYTBkYWMwZmMxNGJlMzlmN2EyOWRkZGU3YTc2MWQ=|1570887993|e0c6c5870b829f6e9f5cbcff5b47cda6079c2255"; l_cap_id="OTNiZjhmN2Y1NzczNGE3YjhjNmQzZGQ1YmIyMTBlM2M=|1570887993|60125ff0d64929826091bea0b065aba686f33d4c"; z_c0=Mi4xbEVybENRQUFBQUFBb0tVc0xIMDlEeGNBQUFCaEFsVk5QU2VQWGdDbUVLcGpNOVRZTTQ2NUZPMmRxRjlqV1A1QkpR|1570887997|dea232239aa826755bf8427e8f02f15f6d39e9e2; tst=r; tgw_l7_route=e9ff3200fd05d0af15498c125aecf1a1; Hm_lvt_98beee57fd2ef70ccdd5ca52b9740c49=1570887971,1570887976,1570954311; Hm_lpvt_98beee57fd2ef70ccdd5ca52b9740c49=1570954311",
"User-Agent":"'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)'",
}
5.证书验证设置
经常会出现网页错误提示信息为:您的链接不是私密链接,一般是网站证书没有被信任,将verif设置为False会忽视这一步骤,默认为True。
verif = False
将证书验证忽略后,输出的时候会出现证书验证的警告,可使用logging包将警告忽略:
import logging
logging.captureWarnings(True)
6.设置代理IP
proxies = {
'http':'http://159.224.13.29:61366',
'https':'https://159.224.13.29:61366'
}
7.身份验证,使用auth设置用户名和密码来自动完成认证
auth = ("username","password")
8.设置延时函数
timeout = 10
9.发起请求
r = requests.get(url,params=params,headers=headers,verify=verify,proxies=proxies,auth=auth,timeout=timeout)
10.将获得的数据转码,并转为字符串
r.encoding = 'utf-8'
theData = r.text
POST请求类型
import requests
r = requests.post("www.xxx.com")
print(r.text)