这几天一直在学习用python 爬取网页, 今天试着写了一个小程序, 模拟登陆 人人网,并且提供了发送文字状态的函数。在登录的时候,已经可以处理有验证码的 情况 ,就拿来练手吧。 (在处理字符串的部分比较麻烦===因为不敢用正则表达式QAQ )
代码:
# -*- coding: utf-8 -*-
import urllib2,urllib,cookielib,json,sys
from bs4 import BeautifulSoup
###
reload(sys)
sys.setdefaultencoding('utf8')
##
##cookie
cookiejar= cookielib.CookieJar()
cookproc= urllib2.HTTPCookieProcessor(cookiejar)
###
###get the page
def get(url,headers=False):
if headers:
req=urllib2.Request(url,headers)
else:
req=urllib2.Request(url)
opener=urllib2.build_opener(cookproc)
urllib2.install_opener(opener)
page=urllib2.urlopen(req)
try:
page=page.decode('utf-8')
except :
page=page.decode('gbk','ignore')
return page
def post(url,postdata,headers=False):
postdata=urllib.urlencode(postdata)
if headers:
req=urllib2.Request(url,postdata,headers)
else :
req=urllib2.Request(url,postdata)
opener=urllib2.build_opener(cookproc)
urllib2.install_opener(opener)
page=urllib2.urlopen(req).read()
try:
page=page.decode('utf-8')
except :
page=page.decode('gbk','ignore')
return page
class RenRen:
email =""
pwd =""
_id =""
name =""
icode =""
_rtk =""
_tok =""
def __init__(self,email,pwd):
self.email=email
self.pwd=pwd
self.check()
self.login()
def check(self):
postdata={
'_rtk' :self._rtk,
'email' :self.email
}
url='http://www.renren.com/ajax/ShowCaptcha'
if post(url,postdata)==1:
self.getCode()
def getCode(self):
par={
't' :'web_login',
'rnd' :0.47730758627661407
}
url='http://icode.renren.com/getcode.do?%s' %urllib.urlencode(par)
data=get(url)
tmp = tempfile.mkstemp(suffix='.png')
os.write(tmp[0], data)
os.close(tmp[0])
os.startfile(tmp[1])
self.icode=input("输入验证码:")
## get necesary paraments
def getpar(self):
for x in cookiejar:
if x.name=='id':
self._id=x.value
break
url='http://www.renren.com/%s' % self._id
page=urllib2.urlopen(url).read()
html=BeautifulSoup(page)
parli=html.find(name='script')
s=str(parli)
index1=s.index('nx.user')
index2=s.index('nx.user.isvip')
s=s[index1+10:index2-1]
parli=s.split(',')
self.name=parli[3][8:]
self._tok=parli[4][17:-1]
self._rtk=parli[5][9:-3]
print self.name,self._tok,self._rtk
def login(self):
postdata={
'captcha_type' :'web_login',
'domain' :'renren.com',
'email' :self.email,
'f' :'http%3A%2F%2Fwww.renren.com%2F476284792',
'icode' :self.icode,
'key_id' :1,
'origURL' :'http://www.renren.com/home',
'password' :self.pwd,
'rkey' :''
}
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0'
}
url="http://www.renren.com/ajaxLogin/login?1=1&uniqueTimestamp=2014611340911"
info=json.loads(post(url,postdata,headers))
if info["code"]:
self.getpar()
print self.name,'登录成功'
else:
print '登录失败'
def publish(self,content):
postdata={
'_rtk' :self._rtk,
'channel' :'renren',
'content' :content,
'privacyParams' :{"sourceControl":"99"},
'requestToken' :self._tok,
'withInfo' :'%7B%22wpath%22%3A%5B%5D%7D'
}
url='http://shell.renren.com/'+self._id+'/status'
post(url,postdata)
print 'ok'
email= input('用户名')
password=input('密码')
rr=RenRen(email,password)
#rr.publish('下雨吧')