python新浪微博模拟登陆

    一直苦恼于新浪微博API开放的接口限制太多。只能用爬虫获取数据,然后我花了很长时间去找模拟登陆代码,根本没有一个能成功的。于是我就参考了谋篇文章,对代码做了些小修改,终于可以用了。

相关参考可以看http://blog.csdn.net/ta790799213/article/details/44205351

二:在模拟登陆的时候出现了个retcode=4049,解决办法:在http://login.sina.com.cn/?r=%2Fmember%2Fsecurity%2Fprotect.php登陆进去后,设置部分区域登陆不用验证码。如果不行,登陆微博,在登陆保护页同样设置一下区域登陆不用验证码。

三:代码如下:

# -*- coding: utf-8 -*-
import requests
import base64
import re
import urllib
import urllib2
import rsa
import json
import binascii
import string
from weibo import Client
import random
import time
import logging, logging.handlers

code = "5f9f84b2aa3198032416963c84c2d182"
app_key = "1110261163"
app_secret = "0de95a319a66c755c008b6332d7dd063"
redirect_uri = "https://api.weibo.com/oauth2/default.html"



class SinaCrawler:
    def __init__(self, max_page):
        self.session = None
        self.MAX_PAGE = max_page
        token = {u'access_token': u'2.00pE39sBn1UT7E61e7174d95TdYVED', u'remind_in': u'157679999', u'uid': u'1720813027', u'expires_at': 1575304674}
        self.client = Client(app_key, app_secret, redirect_uri, token)
        self.f = open("data", "w")

    def __del__(self):
        self.f.close()

    def userlogin(self,username,password):
        session = requests.Session()
        url_prelogin = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.18)&_=1430736851146'
        url_login = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.8)'
        #get servertime,nonce, pubkey,rsakv
        resp = session.get(url_prelogin)
        print resp.content
        p = re.compile('{.*}')
        json_data  = re.search(p, resp.content).group()
        print json_data
        data       = eval(json_data)
        servertime = data['servertime']
        print 'servertime:',servertime
        nonce      = data['nonce']
        pubkey     = data['pubkey']
        rsakv      = data['rsakv']

        # calculate su
        su  = base64.b64encode(urllib.quote(username))

        #calculate sp
        rsaPublickey= int(pubkey,16)
        key = rsa.PublicKey(rsaPublickey,65537)
        message = str(servertime) +'\t' + str(nonce) + '\n' + str(password)
        sp = binascii.b2a_hex(rsa.encrypt(message,key))
        postdata = {
            'entry': 'weibo',
            'gateway': '1',
            'from': '',
            'savestate': '7',
            'userticket': '1',
            'ssosimplelogin': '1',
            'vsnf': '1',
            'vsnval': '',
            'su': su,
            'service': 'miniblog',
            'servertime': servertime,
            'nonce': nonce,
            'pwencode': 'rsa2',
            'sp': sp,
            'encoding': 'UTF-8',
            'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
            'returntype': 'META',
            'rsakv' : rsakv,
        }
        resp = session.post(url_login,data = postdata)
        # print resp.headers
        print resp.content
        login_url = re.findall('{location\.replace\(\'(.+?)\'\);}',resp.content)
        print 'login_url',login_url,type(login_url)
        respo = session.get(login_url[0])
        print respo.content
        self.session = session
        
    def do_search(self, query):
        """ do search 

        Args:
            query : str indicating the query 

        Return:
            None
        """
        self.f.write('screen_name\tgender\trelated_msg\tregister_time\tlocation\n')
        for page in range(1, self.MAX_PAGE + 1):
            time.sleep(random.random())
            self.do_search_page(page, query)
    def do_search_page(self, page, query):
        """ get search result of the page in the search html page 

        Args:
            page : int indicating the number of the page

        Return:
            None
        """
        search_url  = "http://s.weibo.com/wb/%s&page=%d" % (query, page)
        html_page = self.session.get(search_url)
        print html_page.content
#         print all_results
#         res_cnt = 1
#         for res in all_results:
#             print 'page %d result %d done' % (page, res_cnt)
#             res_cnt += 1
#             information = self.get_person_info(res)
#     def get_search_result(self, html_content):
#         """ get search result from the html content 
#             
#         Args:
#             html_content: str for storing html content of the search page
#         
#         Return:
#             None
#         """
#         #content = re.findall(r"\"pid\":\"pl_user_feedList\"(?P<tips>[\w\W]*?)", html_content)
#         html_content = html_content.strip()
#         content = re.findall(r"\"pid\":\"pl_wb_feedlist\"(?P<tips>[\w\W]*?)</script>", html_content)[0]        
#         clean_content = string.replace(content, "\\\\", "\\")
#         search_result = re.findall(r"<div class=\\\"WB_cardwrap S_bg2 clearfix\\\" >(?P<tips>[\w\W]*?)<\\/div>\\n<\\/div>", clean_content)
#         return search_result
    
           
if __name__ == '__main__':
    sina_crawler = SinaCrawler(2)
    sina_crawler.userlogin('18650306405', 'lkz881199')

    query = 'iphone'
    #print type(query)
    q = string.replace(str(urllib.quote(query)), "%", "%25")
    print q
    sina_crawler.do_search(q)
四:结果如图:

模拟登陆成功后,在每个页面上都会有你的个人信息入微博昵称:这个程序员不太冷2,还有uid。。


  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值