因为requests基于urllib,所以看一下urllib的请求过程。
import urllib2
def load_baidu_data():
url = 'http://www.baidu.com'
request = urllib2.Request(url)
response = urllib2.urlopen(request)
data =response.read()
return data
if __name__ == '__main__':
print(load_baidu_data())
window,python2中的urllib get请求。
import urllib2
def user_defined():
url = "http://www.baidu.com"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}
request = urllib2.Request(url, headers=headers)
handler = urllib2.HTTPHandler()
opener = urllib2.build_opener(handler)
response = opener.open(request)
print response.read()
if __name__ == '__main__':
user_defined()
实际的过程是创建处理器HTTPHandler,根据处理器自定义opener,根据opener发送请求open。
那有用户代理呢
import urllib2
def proxy_opener():
url = "http://www.baidu.com"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}
request = urllib2.Request(url, headers=headers)
proxy = {"http": ":"}
proxy_handler = urllib2.ProxyHandler(proxy)
opener = urllib2.build_opener(proxy_handler)
response = opener.open(request)
print response.read()
if __name__ == '__main__':
proxy_opener()
实际的过程是创建具有代理功能的处理器ProxyHandler,根据处理器自定义opener,根据opener发送请求open。
web认证
import urllib2
def web_auth_open():
url = ''
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}
request = urllib2.Request(url, headers=headers)
pwd_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
pwd_manager.add_password(None, uri=url, user='', passwd='')
web_handler = urllib2.HTTPBasicAuthHandler(pwd_manager)
web_opener = urllib2.build_opener(web_handler)
response = web_opener.open(request)
print response.read()
if __name__ == '__main__':
web_auth_open()
实际的过程是,创建密码管理器,创建具有认证功能的处理器HTTPBasicAuthHandler,根据处理器自定义opener,根据opener发送请求open。
cookie
import urllib2
import urllib
import cookielib
def cookie_open():
login_url = ''
data_url = ""
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
}
formdata = {
"email": "",
"password": ""
}
formdata = urllib.urlencode(formdata)
login_request = urllib2.Request(login_url, headers=headers, data=formdata)
data_request = urllib2.Request(data_url,headers=headers)
cookie_jar = cookielib.CookieJar()
cookie_handler = urllib2.HTTPCookieProcessor(cookiejar=cookie_jar)
cookie_opener = urllib2.build_opener(cookie_handler)
cookie_opener.open(login_request)
data_response = cookie_opener.open(data_request)
data = data_response.read()
print data
if __name__ == '__main__':
data = cookie_open()
实际的过程是,创建cookie_jar,创建具有cookie功能的处理器HTTPCookieProcessor,根据处理器自定义opener,根据opener发送请求open。
忽略ssl认证
import urllib2
import ssl
def ssl_load_data():
url = "https://www.12306.cn/mormhweb/"
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"}
request = urllib2.Request(url,headers=headers)
context = ssl._create_unverified_context()
resposne = urllib2.urlopen(request,context=context)
with open("","w") as f:
f.write(resposne.read())
if __name__ == '__main__':
ssl_load_data()