python爬知乎问题_python爬虫知乎问答

python爬虫知乎问答

import cookielib

import base64

import re

import hashlib

import json

import rsa

import binascii

import urllib2

import urllib

import sys

from lxml import etree

#coding:utf-8

def get_username(username):

username=urllib.quote(username)

username=base64.encodestring(username)[:-1]

return username

def get_passwd(servertime,nonce,password):

rsa_e=65537

public_key='EB2A38568661887FA180BDDB5CABD5F21C7BFD59C090CB2D245A87AC253062882729293E5506350508E7F9AA3BB77F4333231490F915F6D63C55FE2F08A49B353F444AD3993CACC02DB784ABBB8E42A9B1BBFFFB38BE18D78E87A0E41B9B8F73A928EE0CCEE1F6739884B9777E4FE9E88A1BBE495927AC4A799B3181D6442443'

encry_string=str(servertime)+'\t'+str(nonce)+'\n'+password

key=rsa.PublicKey(int(public_key,16),rsa_e)

encropy_pwd = rsa.encrypt(encry_string, key)

return binascii.b2a_hex(encropy_pwd)

def get_prelogin_args(pre_login):

prelogin_page=urllib2.urlopen(pre_login).read()

p=re.compile('\((.*?)\)')

json_data=p.search(prelogin_page).group(1)

data=json.loads(json_data)

servertime=data['servertime']

nonce=data['nonce']

return servertime,nonce

def do_login(login_url):

postdata={

'encoding':'UTF-8',

'entry':'weibo',

'from':'',

'gateway':'1',

'nonce':'',

'pagerefer':'http://login.sina.com.cn/sso/logout.php?entry=miniblog&r=http%3A%2F%2Fweibo.com%2Flogout.php%3Fbackurl%3D%252F',

'prelt':'45',

'pwencode':'rsa2',

'returntype':'META',

'rsakv':'1330428213',

'savestate':'7',

'servertime':'',

'service':'miniblog',

'sp':'',

'sr':'1366*768',

'su':'',

'url':'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',

'useticket':'1',

'vsnf':'1'

}

username='644202562@qq.com'

passwd='18956480156jf'

pre_login='http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=NjQ0MjAyNTYyJTQwcXEuY29tJTVD&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.18)&_=1467687741384'

servertime,nonce=get_prelogin_args(pre_login)

postdata['servertime']=servertime

postdata['nonce']=nonce

su=get_username(username)

sp=get_passwd(servertime,nonce,passwd)

postdata['su']=su

postdata['sp']=sp

postdata=urllib.urlencode(postdata)

cookie_jar=cookielib.LWPCookieJar()

cookie_support=urllib2.HTTPCookieProcessor(cookie_jar)

opener2=urllib2.build_opener(cookie_support,urllib2.HTTPHandler)

urllib2.install_opener(opener2)

http_headers={'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0'}

req=urllib2.Request(login_url,data=postdata,headers=http_headers)

response=urllib2.urlopen(req)

test=response.read()

p1=re.compile('location\.replace\(\'(.*?)\'\)')

p2=re.compile('feedBackUrlCallBack\((.*?)\)')

url=p1.search(test).group(1)

login_page=urllib2.urlopen(url).read()

info=p2.search(login_page).group(1)

js_data=json.loads(info)

userinfo=js_data['userinfo']

final_login='http://weibo.com/' +userinfo['userdomain']

page=urllib2.urlopen(final_login).read()

type= sys.getfilesystemencoding()

content= page.decode("UTF-8").encode(type)

print 'Login Success!

return opener2,final_login

def get_fans(opener,final_login):

print final_login

page=opener.open(final_login).read()

type= sys.getfilesystemencoding()

content= page.decode("UTF-8").encode(type)

page=etree.HTML(page.lower().decode('utf-8','ignore'))

items=page.xpath('body/div[@class=\"WB_text W_f14\"][@node-type=\"feed_list_content\"]')

print items

#

#

#

if __name__ == '__main__':

opener,final_ulr=do_login("http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.18)")

get_fans(opener,final_ulr)

p1=re.compile('.*?')

img_url=p1.search(page).group(1)

p2=re.compile('')

img_id=p2.search(page).group(1)

post_data['captcha-id']=img_id

post_datda['captcha-solution']=img_id

http://www.hiry.cn

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值