python模拟登录豆瓣

python模拟登录豆瓣

原理:
——–模拟浏览器登录
环境:
——–开发软件:pycharm
——–运行环境:mac;python2.7.10;
——–requests 用于http请求;HTMLParser用于解析html数据
注意:

——–如果登录次数过多,豆瓣会要求输入密码;这个时候程序自动将验证码下载到当前目录下,需要手动打开该图片,输入验证码进行验证!

代码块

#!/usr/bin/env python
# -*- coding:utf-8 -*-

import requests
from HTMLParser import HTMLParser

class DouBanClient(object):
    def __init__(self):
        self.session = requests.session() ##获取会话session
        headers = {
            'User-Agent':"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36",
            'Referer': "https://www.douban.com/",
            'Host': "www.douban.com",
        }
        self.session.headers.update(headers)

    def login(self, form_email, form_password, source="index_nav"):
        url = "https://www.douban.com/accounts/login"
        r = self.session.get(url)

        #验证码地址
        mp = MyParser()
        mp.feed(r.text)
        mp.close()

        captcha_solution = None

        if mp.captcha_url:
            #访问图片url,把图片保存到本地当前目录下
            img = self.session.get(mp.captcha_url)
            with open('picture.jpg', 'wb') as file:
                file.write(img.content)

            print mp.captcha_id
            captcha_solution = raw_input("验证码:")

            post_data = {
                'source':source,
                'form_email':form_email,
                'form_password':form_password,
                'captcha-solution':captcha_solution,
                'captcha-id':mp.captcha_id
            }

            print post_data
            out = self.session.post(url, data = post_data)
            print out.content
        else:
            post_data = {
                'source': source,
                'form_email': form_email,
                'form_password': form_password,
            }

            out = self.session.post(url, data=post_data)
            print out.content
        pass

class MyParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.captcha_url = None
        self.captcha_id = None

    #继承HTMLParser的方法,自动遍历所有标签属性
    def handle_starttag(self, tag, attrs):
        def _attr(attrlist, attrname):
            for each in attrlist:
                if attrname == each[0]:
                    return each[1]
            return None

        if tag == 'img' and _attr(attrs, 'id') == 'captcha_image':
            self.captcha_url = _attr(attrs, 'src')
        if tag == 'input' and _attr(attrs, 'name') == 'captcha-id':
            self.captcha_id = _attr(attrs, 'value')


if __name__ == "__main__":
    print("test");
    client = DouBanClient()
    email = raw_input("邮箱:")
    pwd = raw_input("密码:")
    client.login(email, pwd)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值