Contents
http get 请求
# 利用get请求获取相应网页,并以html形式存储
from urllib import request
url='http://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd='
keyword='你好'
# 利用request.quote对中文进行编码
keyword_code=request.quote(keyword)
url_all=url+keyword_code
# 设置Request对象
reg=request.Request(url_all)
# 通过Request get内容,urlopen返回file-like object
data=request.urlopen(reg).read()
fhandle=open(r'D:/pythoncode/crawler/1.html','wb')
fhandle.write(data)
fhandle.close()
http post 请求
用于注册,登录等操
from urllib import request,parse
url='http://www.iqianyue.com/mypost/'
#Convert a mapping object or a sequence of two-element tuples, which may contain str or bytes objects,
# to a percent-encoded ASCII text string. If the resultant string is to be used as a data for
# POST operation with the urlopen() function, then it should be encoded to bytes, otherwise it would
# result in a TypeError.
postdata=parse.urlencode({'name':'ceo@iqianyue.com','password':'aA123456'}).encode('utf-8')
# 构建带有postdata的Request 对象
reg=request.Request(url,postdata)
# 为Request添加浏览器模拟
reg.add_header( 'User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36')
data=request.urlopen(reg).read()
fh=open(r'D:\pythoncode\crawler\2.html','wb')
fh.write(data)
fh.close()