封装函数如下这里写代码片
from urllib import request,parse
from urllib.error import HTTPError,URLError
#保存cookie的方法
from http import cookiejar
#封装类
class session(object):
def __init__(self):
#cookie对象
cookie_object = cookiejar.CookieJar()
#hanler对应request操作
handler = request.HTTPCookieProcessor(cookie_object)
#opener在遇到有cookie的response的时候使用
self.opener = request.build_opener(handler)
def get(self,url,headers=None):
return get(url,headers,self.opener)
def post(self,url,form,headers=None):
return post(url,form,headers,self.opener)
#get方法封装
def get(url,headers=None,opener=None):
#headers必须赋予变量否则属于位置参数会在调用函数中被当成form
return urlrequests(url,headers=headers,opener=opener)
#post方法封装
def post(url,form,headers=None,opener=None):
return urlrequests(url,form,headers=headers,opener=opener)
def urlrequests(url,form=None,headers=None,opener=None):
if headers == None:
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
html_bytes = b''
try:
if form:
#转换成str
form_str = parse.urlencode(form,encoding='utf-8')
#转换成bytes
form_bytes = form_str.encode('utf-8')
req = request.Request(url,data=form_bytes,headers=headers)
else:
req = request.Request(url,headers=headers)
if opener:
req = request.Request(url,headers=headers)
else:
response = request.urlopen(req)
html_bytes = response.read()
except HTTPError as a:
print(a)
except URLError as a:
print(a)
return html_bytes
调用这里写代码片
from 上面的代码 import session
import json
#登录界面抓取路由,在登录界面输完账号密码打开源代码,然后点击登录,抓取login文件里路由与form
url='http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2018721441132'
form = {
#这里我就打码了。。
'email': '***********',
'icode': '',
'origURL': 'http://www.renren.com/home',
'domain': 'renren.com',
'key_id': '1',
'captcha_type': 'web_login',
'password': '95cb2a1d59b918e0d16ab5d3535fb40103e4b546e651a3e3c99b91876927c78a',
'rkey': 'a7bccfbafd7ee702247450942dff5611',
'f': 'http%3A%2F%2Fwww.renren.com%2F966927992'
}
s = session()
html = s.post(url, form)
res_dict = json.loads(html.decode('utf-8'))
home_url = res_dict['homeUrl']
# 访问页面
html = s.get(home_url)
print(html.decode('utf-8'))