python模拟登录豆瓣
原理:
——–模拟浏览器登录
环境:
——–开发软件:pycharm
——–运行环境:mac;python2.7.10;
——–requests 用于http请求;HTMLParser用于解析html数据
注意:
——–如果登录次数过多,豆瓣会要求输入密码;这个时候程序自动将验证码下载到当前目录下,需要手动打开该图片,输入验证码进行验证!
代码块
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import requests
from HTMLParser import HTMLParser
class DouBanClient(object):
def __init__(self):
self.session = requests.session() ##获取会话session
headers = {
'User-Agent':"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36",
'Referer': "https://www.douban.com/",
'Host': "www.douban.com",
}
self.session.headers.update(headers)
def login(self, form_email, form_password, source="index_nav"):
url = "https://www.douban.com/accounts/login"
r = self.session.get(url)
#验证码地址
mp = MyParser()
mp.feed(r.text)
mp.close()
captcha_solution = None
if mp.captcha_url:
#访问图片url,把图片保存到本地当前目录下
img = self.session.get(mp.captcha_url)
with open('picture.jpg', 'wb') as file:
file.write(img.content)
print mp.captcha_id
captcha_solution = raw_input("验证码:")
post_data = {
'source':source,
'form_email':form_email,
'form_password':form_password,
'captcha-solution':captcha_solution,
'captcha-id':mp.captcha_id
}
print post_data
out = self.session.post(url, data = post_data)
print out.content
else:
post_data = {
'source': source,
'form_email': form_email,
'form_password': form_password,
}
out = self.session.post(url, data=post_data)
print out.content
pass
class MyParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.captcha_url = None
self.captcha_id = None
#继承HTMLParser的方法,自动遍历所有标签属性
def handle_starttag(self, tag, attrs):
def _attr(attrlist, attrname):
for each in attrlist:
if attrname == each[0]:
return each[1]
return None
if tag == 'img' and _attr(attrs, 'id') == 'captcha_image':
self.captcha_url = _attr(attrs, 'src')
if tag == 'input' and _attr(attrs, 'name') == 'captcha-id':
self.captcha_id = _attr(attrs, 'value')
if __name__ == "__main__":
print("test");
client = DouBanClient()
email = raw_input("邮箱:")
pwd = raw_input("密码:")
client.login(email, pwd)