python urllib2 cookielib处理验证码模拟人人登陆

又重新学了下python的urllib2,参考http://www.cnpythoner.com/post/30.htmlhttp://www.pythoner.com/65.html,写了个登陆人人的脚本,能够处理验证码的情况会jpg写到本地,主页新鲜事正则貌似有点问题,先不管了。

# -*- coding: utf-8 -*-

import urllib
import urllib2
import cookielib
import re

import config

class Renren(object):
    def __init__(self):
        self.operate = ''  # response的对象(不含read)
        self.requestToken = self.rtk = ''
        self.icode = ''  # 验证码
        self.is_login = False
        #added
        self.web_content = ''
             
        self.cj = cookielib.CookieJar()
        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
        urllib2.install_opener(self.opener)
        
        self.requestToken_pattern = re.compile(r"get_check:'([-0-9]*)'")
        self.rtk_pattern = re.compile(r"get_check_x:'([a-zA-Z0-9]+)'")

    def login(self, email='', password='', origURL=''):
        postdata = {
                         'email': email,
                         'password': password,
                         'origURL': origURL,
        }

        ruid_pattern = re.compile(r"'ruid':'(\d+)'")
        failCode_pattern = re.compile(r"&failCode=(\d+)")
        
        print 'Login...'
        
        while not self.is_login:
            self.operate = self._get_response(config.LOGINURL, postdata)
            cur_url = self.operate.geturl()
            self.web_content = self.operate.read()
            #print web_content
            ruid = ruid_pattern.search(self.web_content)
            
            if ruid:
                self.is_login = True
                print u"用户  %s %s" % (ruid.group(1), config.FAILCODE['-1'])
                return True
            else:
                failCode = failCode_pattern.search(cur_url)
                if not failCode:
                    print '无法获得错误代码'
                else:
                    definate_failCode = failCode.group(1)  # 确切的failCode字符串
                    if definate_failCode in config.FAILCODE.keys():
                        print config.FAILCODE[definate_failCode]
                        
                        if definate_failCode == '512':
                            self._get_icode_img()
                            self.icode = raw_input(u"请输入验证码: ")
                            postdata['icode'] = self.icode
                            continue
                    else:
                        print '未知错误'
                return False
    
    def _get_response(self, url, data = None):
        if data is not None:
            req = urllib2.Request(url, urllib.urlencode(data))
        else:
            req = urllib2.Request(url)
        
        response = self.opener.open(req)
        return response
    
    def _get_requestToken(self, data):
        self.requestToken = self.requestToken_pattern.search(data).group(1)
        self.rtk = self.rtk_pattern.search(data).group(1)
    
    def _get_icode_img(self):
        icode_img = self._get_response(config.ICODEURL).read()
        self._write_file('icode.jpg', icode_img)
    
    def _write_file(self, filename, data):
        try:
            output_file = open(filename, 'wb')
            output_file.writelines(data)
            output_file.close()
            print u'文件 %s 写入完成!' % filename
        except IOError:
            print "写文件失败!"

    #-------------------------------------------------------
    def viewnewinfo(self):
        """查看好友的更新状态"""
        self.__caiinfo()

    def __caiinfo(self):
        """采集信息"""
        h3patten = re.compile('<h3>(.*?)</h3>')#匹配范围
        apatten = re.compile('<a.+>(.+)</a>:')#匹配作者
        cpatten = re.compile('</a>(.+)\s')#匹配内容
        infocontent = self.operate.readlines()
        infocontent = self.web_content

        print 'friend newinfo:'
        #for i in infocontent:
        content = h3patten.findall(infocontent)
        if len(content) != 0:
            for m in content:
                username = apatten.findall(m)
                info = cpatten.findall(m)
                if len(username) != 0:
                    print username[0],'说',info[0]
                    print '----------------------------------------------'
                else:
                    continue


if __name__ == "__main__":
    my_account = Renren()
    my_account.login(config.EMAIL, config.PASSWORD, '')
    my_account.viewnewinfo()

config模块代码如下

# -*- coding: utf-8 -*-

LOGINURL = r'http://www.renren.com/PLogin.do'
ICODEURL = r'http://icode.renren.com/getcode.do?t=login&rnd=Math.random()'

EMAIL = r'你的邮箱'
PASSWORD = r'你的密码'

# FailCode via "login-v6.js"
FAILCODE = {
            '-1': u'登录成功',
            '0': u'登录系统错误,请稍后尝试',
            '1': u'您的用户名和密码不匹配',
            '2': u'您的用户名和密码不匹配',
            '4': u'您的用户名和密码不匹配',
            '8': u'请输入帐号,密码',
            '16': u'您的账号已停止使用',
            '32': u'帐号未激活,请激活帐号',
            '64': u'您的帐号需要解锁才能登录',
            '128': u'您的用户名和密码不匹配',
            '512': u'请您输入验证码',
            '4096': u'登录系统错误,稍后尝试',
}



评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值