python 模拟登陆豆瓣

模拟登陆首先要手动登陆通过Httpfox获得头部信息headers,需要向登陆网址post的post_data,及post_url

agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'
headers = {
   'Host':'www.douban.com',
    'Referer':'https://www.douban.com/',
    'User-Agent':agent

}


不需要验证码时的post_data

post_data = {
        'redir':'https://www.douban.com',
        'source':None,
        'form_email':*****,
        'form_password':*****,
        'remember':'on'
    }

需要验证码登陆时的post_data:

post_data = {
        'redir':'https://www.douban.com',
        'source':None,
        'form_email':*****,
        'form_password':*****,
        'remember':'on
        'captcha-solution':*****
        'captcha-id':*****
    }

以下为代码:

# _*_coding:utf-8_*_
import requests
from PIL import Image
import cookielib
import re,os

agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0'
headers = {
   'Host':'www.douban.com',
   'Referer':'https://www.douban.com/',
   'User-Agent':agent
}

session = requests.session()
session.cookies = cookielib.LWPCookieJar(filename='DoubanCookies')

try:
    session.cookies.load(ignore_discard=True)
except:
    print "DoubanCookies未加载"

#获取验证码
def get_captcha():
    url = "https://www.douban.com/accounts/login"
    response = session.get(url,headers=headers).text
    pattern = re.compile(r'<input type="hidden" name="captcha-id" value="(.*?)"/>',re.S)
    item = re.findall(pattern,response)
    captcha_id = item[0]
    captcha_url = "https://www.douban.com/misc/captcha?id=" + captcha_id + "&size=s"
    r = session.get(captcha_url,headers=headers)
    with open('Douban_captcha.jpg','wb') as f:
        f.write(r.content)
        f.close()

    try:
        im = Image.open('Douban_captcha.jpg')
        im.show()
        im.close()
    except:
        print "请到%s手动打开Douban_captcha.jpg"%os.path.abspath('Douban_captcha.jpg')

    captcha = raw_input("请输入captcha:\n>  ")
    return captcha_id,captcha

def islogin():
    ceshi_url='https://www.douban.com/people/152474925/'#我是用个人主页来判断是否登陆成功
    res = session.get(ceshi_url,headers=headers).text
    pattern = re.compile(r'<a target="_blank" href=.*?class="bn-more">.*?<span>(.*?)</span>',re.S)
    items = re.findall(pattern,res)
    return items


def Login():
    login_url = 'https://www.douban.com/accounts/login'
    post_data = {
        'redir':'https://www.douban.com',
        'source':None,
        'form_email':******,
        'form_password':*******,
        'remember':'on'
    }

    try:
        # 需要验证码登录
        items = get_captcha()
        post_data["captcha-solution"] = items[1]
        post_data["captcha-id"] = items[0]
        login_page = session.post(login_url, data=post_data, headers=headers)
        login_code = login_page.text
        print login_page.status_code
        #print login_code
        if islogin():
            print "验证码登录成功!"

    except:
        # 不需要验证码登录
        login_page = session.post(login_url, data=post_data, headers=headers)
        login_code = login_page.text
        print login_page.status_code
        #print login_code
        if islogin():
            print "不需要验证码登录成功!"

    session.cookies.save(ignore_discard=True, ignore_expires=True)


if __name__ == "__main__":
    if islogin():
        print "加载cookie登陆成功"
        print islogin()[0]
    else:
        Login()




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值