from urllib import request, parse
from http import cookiejar
filename = 'cookie.txt'
class Spider(object):
headers = {
'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
}
def __init__(self, login_url=None, url=None, filename=None, login_page_url=None):
"""
:param login_url: 登陆页面提交表单url'http://www.renren.com/PLogin.do'
:param url: 目标个人中心的url 'http://www.renren.com/966969502/profile'
:param filename: cookie文件名
:param login_page_url: 登陆页面url 'http://www.renren.com/PLogin.do'
"""
self.login_url = login_url
self.url = url
self.filename = filename
self.login_page_url = login_page_url
def create_cookiejar(self):
"""
实例化MozillaCookieJar(),存储cookiejar对象到文件
:param
:return:
"""
cookie_jar = cookiejar.MozillaCookieJar(filename=self.filename)
return cookie_jar
def create_openor(self, cookie_jar):
handler = request.HTTPCookieProcessor(cookie_jar)
opener = request.build_opener(handler)
return opener
# 先访问登陆页面获取部分cookie
def get_login_page(self, opener):
"""
:param opener: create_opner 的返回值
:return:
"""
login_get_request = request.Request(url=self.login_page_url, headers=self.headers)
login_get_response = opener.open(login_get_request)
# 第一次登陆获取全部cookie
def first_login(self, opener, cookie_jar):
data = {
'email': '你的email',
'password': '你的密码'
}
data = parse.urlencode(data).encode('utf-8')
first_login_request = request.Request(url=self.login_url, headers=self.headers, data=data)
first_login_response = opener.open(first_login_request)
# 保存获得所有cookie
cookie_jar.save(ignore_discard=True, ignore_expires=True)
self.status = first_login_response.getcode()
return self.status
# 从文件中获取cookie
def get_cookie_from_file(self):
full_cookie = cookiejar.MozillaCookieJar()
full_cookie.load(self.filename, ignore_discard=True, ignore_expires=True)
print(full_cookie)
return full_cookie
# 携带cookie访问
def get_data_with_cookie(self, opener):
personcenter_request = request.Request(url=self.url, headers=self.headers) # url 访问
personcenter_response = opener.open(personcenter_request)
content = personcenter_response.read().decode('utf-8')
with open('./renren_personal_center.html', 'w', encoding='utf-8') as fp:
fp.write(content)
def main():
filename = 'cookie.txt'
login_url = 'http://www.renren.com/PLogin.do'
url = 'http://www.renren.com/966969502/profile'
login_page_url = 'http://www.renren.com/PLogin.do'
renren_spider = Spider(login_page_url=login_page_url, url=url, login_url=login_url, filename=filename)
# cookie_jar = renren_spider.create_cookiejar()
# opener = renren_spider.create_openor(cookie_jar)
# # 第一次登陆
# renren_spider.get_login_page(opener)
# renren_spider.first_login(opener=opener, cookie_jar=cookie_jar)
# # 携带cookie访问
full_cookie = renren_spider.get_cookie_from_file()
newopener = renren_spider.create_openor(full_cookie)
renren_spider.get_data_with_cookie(newopener)
if __name__ == '__main__':
main()
# print(cookie)
# print(personcenter_response)
# loginurl = 'http://renren.com'
# s = requests.Session()
# r = s.get(url=loginurl, allow_redirects=True)
#
# datas = {'email': '18813388480', 'password': 'zm636057893'}
# headers = {
# 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:61.0) Gecko/20100101 Firefox/61.0',
#
# }
# url = 'http://www.renren.com/PLogin.do'
# response = s.post(url=url, data=datas, allow_redirects=True)
# cookie_dict = requests.utils.dict_from_cookiejar(s.cookies)
# print(cookie_dict)
# print(response.text)