如何打开微博的门禁

在实验室,一直处理爬虫程序,对于微博数据也是抓了又抓,微博的门禁如何开启,进入之后方能游刃有余,还是先把这个大门开启吧。

先后,对方换了几把锁,越换越垃圾,还真说不定以后全开放啦。

现在下面这段代码,仅仅适用于2013年11月至今(2013年12月),以后对方肯定还会换锁,不过大同小异,先来看看代码吧。

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;

import Global.PublicEnter;

public class Sina {
	
	private String SINA_PK = "EB2A38568661887FA180BDDB5CABD5F21C7BFD59C090CB2D24"
            + "5A87AC253062882729293E5506350508E7F9AA3BB77F4333231490F915F6D63C55FE2F08A49B353F444AD39"
            + "93CACC02DB784ABBB8E42A9B1BBFFFB38BE18D78E87A0E41B9B8F73A928EE0CCEE"
            + "1F6739884B9777E4FE9E88A1BBE495927AC4A799B3181D6442443";
	private String LOGOUTURL = "http://t.sina.com.cn/logout.php?";
	
	private DefaultHttpClient client = new DefaultHttpClient();
	
	private final Log logger = LogFactory.getLog(Sina.class);
	
	public String visit(String in_page)
	{
		String entity="";
		// 获取到实际url进行连接
		HttpGet getMethod = new HttpGet(in_page);
		HttpResponse response;
		try {
			response = client.execute(getMethod);
			entity = EntityUtils.toString(response.getEntity());
		} catch (ClientProtocolException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally{
			return entity;
		}
	}

	/**
	 * @param args
	 */
	public  void openDoor(String username,String pwd) throws Exception{
		client.getParams().setParameter("http.protocol.cookie-policy",
                CookiePolicy.BROWSER_COMPATIBILITY);
        client.getParams().setParameter(
                HttpConnectionParams.CONNECTION_TIMEOUT, 5000);
 
        HttpPost post = new HttpPost(
                "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)");
 
        PreLoginInfo info = getPreLoginBean(client);
 
        long servertime = info.servertime;
        String nonce = info.nonce;
 
        String pwdString = servertime + "\t" + nonce + "\n" + pwd;
        String sp = new BigIntegerRSA().rsaCrypt(SINA_PK, "10001", pwdString);
 
        List<NameValuePair> nvps = new ArrayList<NameValuePair>();
        nvps.add(new BasicNameValuePair("entry", "weibo"));
        nvps.add(new BasicNameValuePair("gateway", "1"));
        nvps.add(new BasicNameValuePair("from", ""));
        nvps.add(new BasicNameValuePair("savestate", "7"));
        nvps.add(new BasicNameValuePair("useticket", "1"));
        nvps.add(new BasicNameValuePair("ssosimplelogin", "1"));
        nvps.add(new BasicNameValuePair("vsnf", "1"));
        nvps.add(new BasicNameValuePair("su", encodeUserName(username)));
        nvps.add(new BasicNameValuePair("service", "miniblog"));
        nvps.add(new BasicNameValuePair("servertime", servertime + ""));
        nvps.add(new BasicNameValuePair("nonce", nonce));
        nvps.add(new BasicNameValuePair("pwencode", "rsa2"));
        nvps.add(new BasicNameValuePair("rsakv", info.rsakv));
        nvps.add(new BasicNameValuePair("sp", sp));
        nvps.add(new BasicNameValuePair("encoding", "UTF-8"));
        nvps.add(new BasicNameValuePair("prelt", "115"));
        nvps.add(new BasicNameValuePair("returntype", "META"));
        nvps.add(new BasicNameValuePair(
                "url",
                "http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack"));
 
        post.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
 
        HttpResponse response = client.execute(post);  
        String entity = EntityUtils.toString(response.getEntity());  
//        System.out.println(entity);
 
        String url = entity.substring(entity  
                .indexOf("http://weibo.com/ajaxlogin.php?"), entity  
                .indexOf("code=0") + 6); 
 
        logger.debug("url:" + url);
 
     // 获取到实际url进行连接  
        HttpGet getMethod = new HttpGet(url);  
 
        response = client.execute(getMethod);  
        entity = EntityUtils.toString(response.getEntity());  
        entity = entity.substring(entity.indexOf("userdomain") + 13, entity  
                .lastIndexOf("\""));  
        logger.debug(entity);  
	}
	
	private PreLoginInfo getPreLoginBean(HttpClient client)
            throws HttpException, IOException, JSONException {
 
        String serverTime = getPreLoginInfo(client);
        System.out.println();
//        JSONObject jsonInfo = new JSONObject(serverTime);
        JSONObject jsonInfo = JSONObject.fromObject( serverTime );  
//        JSONObject jsonInfo = new JSONObject();
        PreLoginInfo info = new PreLoginInfo();
        info.nonce = jsonInfo.getString("nonce");
        info.pcid = jsonInfo.getString("pcid");
        info.pubkey = jsonInfo.getString("pubkey");
        info.retcode = jsonInfo.getInt("retcode");
        info.rsakv = jsonInfo.getString("rsakv");
        info.servertime = jsonInfo.getLong("servertime");
        return info;
    }
 
    public String getPreLoginInfo(HttpClient client)
            throws ParseException, IOException {
    	String preloginurl = "http://login.sina.com.cn/sso/prelogin.php?entry=sso&"
                + "callback=sinaSSOController.preloginCallBack&su="
                + "dW5kZWZpbmVk"
                + "&rsakt=mod&client=ssologin.js(v1.4.2)"
                + "&_=" + getCurrentTime();
        HttpGet get = new HttpGet(preloginurl);
 
        HttpResponse response = client.execute(get);
 
        String getResp = EntityUtils.toString(response.getEntity());
 
        int firstLeftBracket = getResp.indexOf("(");
        int lastRightBracket = getResp.lastIndexOf(")");
 
        String jsonBody = getResp.substring(firstLeftBracket + 1,
                lastRightBracket);
        System.out.println(jsonBody);
        return jsonBody;
 
    }
 
    private String getCurrentTime() {
        long servertime = new Date().getTime() / 1000;
        return String.valueOf(servertime);
    }
 
    private String encodeUserName(String email) {
        email = email.replaceFirst("@", "%40");// MzM3MjQwNTUyJTQwcXEuY29t
        email = Base64.encodeBase64String(email.getBytes());
        return email;
    }

	private String encodeAccount(String account) {
		String userName = "";
		try {
			userName = Base64.encodeBase64String(URLEncoder.encode(account,
					"UTF-8").getBytes());
		} catch (UnsupportedEncodingException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return userName;
	}

	private String makeNonce(int len) {
		String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
		String str = "";
		for (int i = 0; i < len; i++) {
			str += x.charAt((int) (Math.ceil(Math.random() * 1000000) % x
					.length()));
		}
		return str;
	}

	private static String getServerTime() {
		long servertime = new Date().getTime() / 1000;
		return String.valueOf(servertime);
	}
	
	public void logout()
	{
		HttpGet method = new HttpGet(LOGOUTURL);
		try {
			client.execute(method);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
}

另外,授人以鱼不如授人以渔,对于信息的封装,实际上是通过截取报文获得的。有一些很好的截取报文的工具,观者可以google一下。

如此便走进了微博,在里面如何折腾就看你自己了,当然有很多技巧,且听下回分解!

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值