模拟登陆首先要手动登陆通过Httpfox获得头部信息headers,需要向登陆网址post的post_data,及post_url
agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'headers = {
'Host':'www.douban.com',
'Referer':'https://www.douban.com/',
'User-Agent':agent
}
post_data = {
'redir':'https://www.douban.com',
'source':None,
'form_email':*****,
'form_password':*****,
'remember':'on'
}
需要验证码登陆时的post_data:
post_data = {'redir':'https://www.douban.com',
'source':None,
'form_email':*****,
'form_password':*****,
'remember':'on
'captcha-solution':*****
'captcha-id':*****
}
以下为代码:
# _*_coding:utf-8_*_
import requests
from PIL import Image
import cookielib
import re,os
agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'
headers = {
'Host':'www.douban.com',
'Referer':'https://www.douban.com/',
'User-Agent':agent
}
session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='DoubanCookies')
try:
session.cookies.load(ignore_discard=True)
except:
print "DoubanCookies未加载"
#获取验证码
def get_captcha():
url = "https://www.douban.com/accounts/login"
response = session.get(url,headers=headers).text
pattern = re.compile(r'<input type="hidden" name="captcha-id" value="(.*?)"/>',re.S)
item = re.findall(pattern,response)
captcha_id = item[0]
captcha_url = "https://www.douban.com/misc/captcha?id=" + captcha_id + "&size=s"
r = session.get(captcha_url,headers=headers)
with open('Douban_captcha.jpg','wb') as f:
f.write(r.content)
f.close()
try:
im = Image.open('Douban_captcha.jpg')
im.show()
im.close()
except:
print "请到%s手动打开Douban_captcha.jpg"%os.path.abspath('Douban_captcha.jpg')
captcha = raw_input("请输入captcha:\n> ")
return captcha_id,captcha
def islogin():
ceshi_url='https://www.douban.com/people/152474925/'#我是用个人主页来判断是否登陆成功
res = session.get(ceshi_url,headers=headers).text
pattern = re.compile(r'<a target="_blank" href=.*?class="bn-more">.*?<span>(.*?)</span>',re.S)
items = re.findall(pattern,res)
return items
def Login():
login_url = 'https://www.douban.com/accounts/login'
post_data = {
'redir':'https://www.douban.com',
'source':None,
'form_email':******,
'form_password':*******,
'remember':'on'
}
try:
# 需要验证码登录
items = get_captcha()
post_data["captcha-solution"] = items[1]
post_data["captcha-id"] = items[0]
login_page = session.post(login_url, data=post_data, headers=headers)
login_code = login_page.text
print login_page.status_code
#print login_code
if islogin():
print "验证码登录成功!"
except:
# 不需要验证码登录
login_page = session.post(login_url, data=post_data, headers=headers)
login_code = login_page.text
print login_page.status_code
#print login_code
if islogin():
print "不需要验证码登录成功!"
session.cookies.save(ignore_discard=True, ignore_expires=True)
if __name__ == "__main__":
if islogin():
print "加载cookie登陆成功"
print islogin()[0]
else:
Login()