又重新学了下python的urllib2,参考http://www.cnpythoner.com/post/30.html和http://www.pythoner.com/65.html,写了个登陆人人的脚本,能够处理验证码的情况会jpg写到本地,主页新鲜事正则貌似有点问题,先不管了。
# -*- coding: utf-8 -*-
import urllib
import urllib2
import cookielib
import re
import config
class Renren(object):
def __init__(self):
self.operate = '' # response的对象(不含read)
self.requestToken = self.rtk = ''
self.icode = '' # 验证码
self.is_login = False
#added
self.web_content = ''
self.cj = cookielib.CookieJar()
self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj))
urllib2.install_opener(self.opener)
self.requestToken_pattern = re.compile(r"get_check:'([-0-9]*)'")
self.rtk_pattern = re.compile(r"get_check_x:'([a-zA-Z0-9]+)'")
def login(self, email='', password='', origURL=''):
postdata = {
'email': email,
'password': password,
'origURL': origURL,
}
ruid_pattern = re.compile(r"'ruid':'(\d+)'")
failCode_pattern = re.compile(r"&failCode=(\d+)")
print 'Login...'
while not self.is_login:
self.operate = self._get_response(config.LOGINURL, postdata)
cur_url = self.operate.geturl()
self.web_content = self.operate.read()
#print web_content
ruid = ruid_pattern.search(self.web_content)
if ruid:
self.is_login = True
print u"用户 %s %s" % (ruid.group(1), config.FAILCODE['-1'])
return True
else:
failCode = failCode_pattern.search(cur_url)
if not failCode:
print '无法获得错误代码'
else:
definate_failCode = failCode.group(1) # 确切的failCode字符串
if definate_failCode in config.FAILCODE.keys():
print config.FAILCODE[definate_failCode]
if definate_failCode == '512':
self._get_icode_img()
self.icode = raw_input(u"请输入验证码: ")
postdata['icode'] = self.icode
continue
else:
print '未知错误'
return False
def _get_response(self, url, data = None):
if data is not None:
req = urllib2.Request(url, urllib.urlencode(data))
else:
req = urllib2.Request(url)
response = self.opener.open(req)
return response
def _get_requestToken(self, data):
self.requestToken = self.requestToken_pattern.search(data).group(1)
self.rtk = self.rtk_pattern.search(data).group(1)
def _get_icode_img(self):
icode_img = self._get_response(config.ICODEURL).read()
self._write_file('icode.jpg', icode_img)
def _write_file(self, filename, data):
try:
output_file = open(filename, 'wb')
output_file.writelines(data)
output_file.close()
print u'文件 %s 写入完成!' % filename
except IOError:
print "写文件失败!"
#-------------------------------------------------------
def viewnewinfo(self):
"""查看好友的更新状态"""
self.__caiinfo()
def __caiinfo(self):
"""采集信息"""
h3patten = re.compile('<h3>(.*?)</h3>')#匹配范围
apatten = re.compile('<a.+>(.+)</a>:')#匹配作者
cpatten = re.compile('</a>(.+)\s')#匹配内容
infocontent = self.operate.readlines()
infocontent = self.web_content
print 'friend newinfo:'
#for i in infocontent:
content = h3patten.findall(infocontent)
if len(content) != 0:
for m in content:
username = apatten.findall(m)
info = cpatten.findall(m)
if len(username) != 0:
print username[0],'说',info[0]
print '----------------------------------------------'
else:
continue
if __name__ == "__main__":
my_account = Renren()
my_account.login(config.EMAIL, config.PASSWORD, '')
my_account.viewnewinfo()
config模块代码如下
# -*- coding: utf-8 -*-
LOGINURL = r'http://www.renren.com/PLogin.do'
ICODEURL = r'http://icode.renren.com/getcode.do?t=login&rnd=Math.random()'
EMAIL = r'你的邮箱'
PASSWORD = r'你的密码'
# FailCode via "login-v6.js"
FAILCODE = {
'-1': u'登录成功',
'0': u'登录系统错误,请稍后尝试',
'1': u'您的用户名和密码不匹配',
'2': u'您的用户名和密码不匹配',
'4': u'您的用户名和密码不匹配',
'8': u'请输入帐号,密码',
'16': u'您的账号已停止使用',
'32': u'帐号未激活,请激活帐号',
'64': u'您的帐号需要解锁才能登录',
'128': u'您的用户名和密码不匹配',
'512': u'请您输入验证码',
'4096': u'登录系统错误,稍后尝试',
}