在实验室,一直处理爬虫程序,对于微博数据也是抓了又抓,微博的门禁如何开启,进入之后方能游刃有余,还是先把这个大门开启吧。
先后,对方换了几把锁,越换越垃圾,还真说不定以后全开放啦。
现在下面这段代码,仅仅适用于2013年11月至今(2013年12月),以后对方肯定还会换锁,不过大同小异,先来看看代码吧。
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpException;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.ParseException;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.params.CookiePolicy;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.params.HttpConnectionParams;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;
import Global.PublicEnter;
public class Sina {
private String SINA_PK = "EB2A38568661887FA180BDDB5CABD5F21C7BFD59C090CB2D24"
+ "5A87AC253062882729293E5506350508E7F9AA3BB77F4333231490F915F6D63C55FE2F08A49B353F444AD39"
+ "93CACC02DB784ABBB8E42A9B1BBFFFB38BE18D78E87A0E41B9B8F73A928EE0CCEE"
+ "1F6739884B9777E4FE9E88A1BBE495927AC4A799B3181D6442443";
private String LOGOUTURL = "http://t.sina.com.cn/logout.php?";
private DefaultHttpClient client = new DefaultHttpClient();
private final Log logger = LogFactory.getLog(Sina.class);
public String visit(String in_page)
{
String entity="";
// 获取到实际url进行连接
HttpGet getMethod = new HttpGet(in_page);
HttpResponse response;
try {
response = client.execute(getMethod);
entity = EntityUtils.toString(response.getEntity());
} catch (ClientProtocolException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally{
return entity;
}
}
/**
* @param args
*/
public void openDoor(String username,String pwd) throws Exception{
client.getParams().setParameter("http.protocol.cookie-policy",
CookiePolicy.BROWSER_COMPATIBILITY);
client.getParams().setParameter(
HttpConnectionParams.CONNECTION_TIMEOUT, 5000);
HttpPost post = new HttpPost(
"http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.2)");
PreLoginInfo info = getPreLoginBean(client);
long servertime = info.servertime;
String nonce = info.nonce;
String pwdString = servertime + "\t" + nonce + "\n" + pwd;
String sp = new BigIntegerRSA().rsaCrypt(SINA_PK, "10001", pwdString);
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("entry", "weibo"));
nvps.add(new BasicNameValuePair("gateway", "1"));
nvps.add(new BasicNameValuePair("from", ""));
nvps.add(new BasicNameValuePair("savestate", "7"));
nvps.add(new BasicNameValuePair("useticket", "1"));
nvps.add(new BasicNameValuePair("ssosimplelogin", "1"));
nvps.add(new BasicNameValuePair("vsnf", "1"));
nvps.add(new BasicNameValuePair("su", encodeUserName(username)));
nvps.add(new BasicNameValuePair("service", "miniblog"));
nvps.add(new BasicNameValuePair("servertime", servertime + ""));
nvps.add(new BasicNameValuePair("nonce", nonce));
nvps.add(new BasicNameValuePair("pwencode", "rsa2"));
nvps.add(new BasicNameValuePair("rsakv", info.rsakv));
nvps.add(new BasicNameValuePair("sp", sp));
nvps.add(new BasicNameValuePair("encoding", "UTF-8"));
nvps.add(new BasicNameValuePair("prelt", "115"));
nvps.add(new BasicNameValuePair("returntype", "META"));
nvps.add(new BasicNameValuePair(
"url",
"http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack"));
post.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
HttpResponse response = client.execute(post);
String entity = EntityUtils.toString(response.getEntity());
// System.out.println(entity);
String url = entity.substring(entity
.indexOf("http://weibo.com/ajaxlogin.php?"), entity
.indexOf("code=0") + 6);
logger.debug("url:" + url);
// 获取到实际url进行连接
HttpGet getMethod = new HttpGet(url);
response = client.execute(getMethod);
entity = EntityUtils.toString(response.getEntity());
entity = entity.substring(entity.indexOf("userdomain") + 13, entity
.lastIndexOf("\""));
logger.debug(entity);
}
private PreLoginInfo getPreLoginBean(HttpClient client)
throws HttpException, IOException, JSONException {
String serverTime = getPreLoginInfo(client);
System.out.println();
// JSONObject jsonInfo = new JSONObject(serverTime);
JSONObject jsonInfo = JSONObject.fromObject( serverTime );
// JSONObject jsonInfo = new JSONObject();
PreLoginInfo info = new PreLoginInfo();
info.nonce = jsonInfo.getString("nonce");
info.pcid = jsonInfo.getString("pcid");
info.pubkey = jsonInfo.getString("pubkey");
info.retcode = jsonInfo.getInt("retcode");
info.rsakv = jsonInfo.getString("rsakv");
info.servertime = jsonInfo.getLong("servertime");
return info;
}
public String getPreLoginInfo(HttpClient client)
throws ParseException, IOException {
String preloginurl = "http://login.sina.com.cn/sso/prelogin.php?entry=sso&"
+ "callback=sinaSSOController.preloginCallBack&su="
+ "dW5kZWZpbmVk"
+ "&rsakt=mod&client=ssologin.js(v1.4.2)"
+ "&_=" + getCurrentTime();
HttpGet get = new HttpGet(preloginurl);
HttpResponse response = client.execute(get);
String getResp = EntityUtils.toString(response.getEntity());
int firstLeftBracket = getResp.indexOf("(");
int lastRightBracket = getResp.lastIndexOf(")");
String jsonBody = getResp.substring(firstLeftBracket + 1,
lastRightBracket);
System.out.println(jsonBody);
return jsonBody;
}
private String getCurrentTime() {
long servertime = new Date().getTime() / 1000;
return String.valueOf(servertime);
}
private String encodeUserName(String email) {
email = email.replaceFirst("@", "%40");// MzM3MjQwNTUyJTQwcXEuY29t
email = Base64.encodeBase64String(email.getBytes());
return email;
}
private String encodeAccount(String account) {
String userName = "";
try {
userName = Base64.encodeBase64String(URLEncoder.encode(account,
"UTF-8").getBytes());
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return userName;
}
private String makeNonce(int len) {
String x = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
String str = "";
for (int i = 0; i < len; i++) {
str += x.charAt((int) (Math.ceil(Math.random() * 1000000) % x
.length()));
}
return str;
}
private static String getServerTime() {
long servertime = new Date().getTime() / 1000;
return String.valueOf(servertime);
}
public void logout()
{
HttpGet method = new HttpGet(LOGOUTURL);
try {
client.execute(method);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
另外,授人以鱼不如授人以渔,对于信息的封装,实际上是通过截取报文获得的。有一些很好的截取报文的工具,观者可以google一下。
如此便走进了微博,在里面如何折腾就看你自己了,当然有很多技巧,且听下回分解!