新浪微博数据挖掘食谱之一: 登录篇 (API)

本文介绍了如何使用API进行新浪微博的数据挖掘,从登录步骤到授权获取数据,为后续的数据抓取打下基础。
摘要由CSDN通过智能技术生成

#!/usr/bin/python 
# -*- coding: utf-8 -*-

'''
Created on 2014-12-28
@author: beyondzhou
@name: login.py
'''

import re, json
import urllib, urllib2, urllib3, cookielib
import base64, rsa, binascii # encrypt
from weibo import APIClient

class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
    def http_error_301(self, cls, req, fp, code, msg, headers):
        result = urllib2.HTTPRedirectHandler.http_error_301(cls, req, fp, code, msg, headers)
        result.status = code
        return result

    def http_error_302(self, cls, req, fp, code, msg, headers):
        result = urllib2.HTTPRedirectHandler.http_error_302(cls, req, fp, code, msg, headers)
        result.status = code
        return result
    
def get_cookie():
    cookies = cookielib.CookieJar()
    return urllib2.HTTPCookieProcessor(cookies)
   
def get_opener(proxy=False):
    rv=urllib2.build_opener(get_cookie(), SmartRedirectHandler())
    rv.addheaders = [('User-agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)')]
    return rv

class SinaAPI():
    def __init__(self, CALLBACK_URL, APP_KEY, REDIRECT_URL, USER_ID, USER_PSWD):
        self.CALLBACK_URL = CALLBACK_URL
        self.APP_KEY = APP_KEY
        self.REDIRECT_URL = REDIRECT_URL
        self.USER_ID = USER_ID
        self.USER_PSWD = USER_PSWD
        self.http = urllib3.PoolManager()
        
    def get_username(self, USER_ID):
        # The Encryption Algorithm of username 
        # ssologin.js : ah.su=sinaSSOEncoder.base64.encode(m(aj));
        USER_ID_ = urllib.quote(USER_ID) # encode username, avoid error refer:@ &  
        su = base64.encodestring(USER_ID_)[:-1]
        return su
   
    def get_password_rsa(self, USER_PSWD, PUBKEY, servertime, nonce):
        # rsa Encrypt :  #when pwencode = "rsa2"
        rsaPubkey = int(PUBKEY, 16)#pubkey from 16 to 10
        key_1 = int('10001', 16) #10001 to 65537 
        key = rsa.PublicKey(rsaPubkey, key_1) #
        message = str(servertime) + "\t" + str(nonce) + "\n" + str(USER_PSWD)
        passwd = rsa.encrypt(message, key)
        passwd = binascii.b2a_hex(passwd) #to 16
        return passwd
      
    def get_parameter(self):
        su = self.get_username(self.USER_ID)
        url = "https://login.sina.com.cn/sso/prelogin.php?entry=openapi&callback=sinaSSOController.preloginCallBack\
&su="+su+"&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.15)"
        r = self.http.request('GET', url)
        p = re.compile('\((.*)\)')
        json_data = p.search(r.data).group(1)
        data = json.loads(json_data)
        
        PUBKEY = data['pubkey']
        pcid = data['pcid']
        servertime = str(data['servertime'])
        nonce = data['nonce']
        rsakv = str(data['rsakv'])
        sp = self.get_password_rsa(self.USER_PSWD, PUBKEY, servertime, nonce)
        
        #print pcid; print servertime; print nonce; print rsakv; print sp; print su
        return pcid, servertime, nonce, rsakv, sp, su
         
    def get_ticket(self):
        pcid, servertime, nonce, rsakv, sp, su = self.get_parameter()
        fields = urllib.urlencode({
            'entry'        : 'openapi',
            'gateway'      : '1',
            'from'         : '',
            'savestate'    : '0',
            'useticket'    : '1',
            'pagerefer'    :'',
            'pcid'         : pcid,
            'ct'           : '1800',
            's'            : 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值