1.POST请求
from urllib import request, parse
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
, "Referer": "https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=",
"Cookie": "JSESSIONID=ABAAAECAAFDAAEH91C93136CA6BAC0D91666C8966AA9FBB; "
"WEBTJ-ID=20200309120608-170bd78b1c0848-0e085422ff4061-4313f6a-2073600-170bd78b1c1b93; "
"_ga=GA1.2.614559968.1583726769; _gid=GA1.2.141853249.1583726769; "
"user_trace_token=20200309120607-f2ebb3e0-e795-4fa3-8a7f-0dcb74e528fb; "
"LGSID=20200309120607-0799029e-482e-4b6e-8d6a-1edef023cecc; "
"LGUID=20200309120607-b431645f-9030-4fe8-ae7f-92e164830870; "
"Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1583726769; "
"index_location_city=%E4%B8%8A%E6%B5%B7; "
"sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22170bd8e1bf918c-0942f423a0a9dc-4313f6a-2073600-170bd8e1bfa5af%22%2C%22%24device_id%22%3A%22170bd8e1bf918c-0942f423a0a9dc-4313f6a-2073600-170bd8e1bfa5af%22%7D; "
"sajssdk_2015_cross_new_user=1; X_HTTP_TOKEN=b2638d431b6c17859878273851e91b7b8a10121280; "
"_gat=1; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1583728791; "
"TG-TRACK-CODE=index_search; LGRID=20200309123954-7bf08a1f-e65c-4f29-8195-40920be6f474; "
"SEARCH_ID=8796b71ed7d347da81750d8d0a6b02c7"}
data = {"first": "true",
"pn": "1",
"kd": "python"}
url = "https://www.lagou.com/jobs/positionAjax.json?city=%E4%B8%8A%E6%B5%B7&needAddtionalResult=false"
resp = request.Request(url, headers=headers, data=parse.urlencode(data).encode('utf-8'), method="POST")
content = request.urlopen(resp)
print(content.read().decode("utf-8"))
2.使用代理IP
from urllib import request
url = "http://httpbin.org/ip"
req1 = request.Request(url)
resp1 = request.urlopen(req1)
print(resp1.read())
handler = request.ProxyHandler({"http":"47.92.82.189"})
opener = request.build_opener(handler)
req = request.Request(url)
resp = opener.open(req)
print(resp.read())
3.Cookie的使用
from urllib import request, parse
from http.cookiejar import CookieJar
# zhihu_url = "https://www.zhihu.com/question/38632401/answer/1060250796"
# headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"}
# req = request.Request(url=zhihu_url,headers=headers)
# resp = request.urlopen(req)
# with open("zhihu.html","w",encoding='utf-8') as fp:
# fp.write(resp.read().decode('utf-8'))
# 1.登录
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"}
data = {"email": "970138074@qq.com",
"password": "pythonspider"}
login_url = "http://www.renren.com/PLogin.do"
def get_opener():
# 2.创建一个cookie jar对象
cookie_jar = CookieJar()
# 3.使用上一步创建的对象
handler = request.HTTPCookieProcessor(cookie_jar)
# 4.使用opener发送登录请求
opener = request.build_opener(handler)`在这里插入代码片`
return opener
def login_renren(opener):
req = request.Request(url=login_url, headers=headers, data=parse.urlencode(data).encode("utf-8"))
reqs = opener.open(req)
def visit_profile(opener):
# 5.访问个人主页
dapeng_url = "http://www.renren.com/880151247/profile"
req = request.Request(url=dapeng_url, headers=headers)
resp = opener.open(req)
with open("renren.html", "w", encoding="utf-8") as fp:
fp.write(resp.read().decode('utf-8'))
if __name__ == '__main__':
opener = get_opener()
login_renren(opener)
visit_profile(opener)
4.Cookie的保存与导入
from urllib import request
from http.cookiejar import MozillaCookieJar
cookie_jar = MozillaCookieJar("cookie.txt")
cookie_jar.load(ignore_discard=True) # 把过期的cookie信息也加载进来
for cookie in cookie_jar:
print(cookie)
handler = request.HTTPCookieProcessor(cookie_jar)
opener = request.build_opener(handler)
rep = opener.open('http://httpbin.org/cookies')
# cookie_jar.save(ignore_discard=True)