用HttpClient访问sina微博

今天接到任务要去爬sina微博内容。爬虫爬得有cookie,所以要用httpclient写个登录接口。
用到的分析工具:Live HTTP Headers(火狐的一个插件)
参考资源:[url]http://blog.csdn.net/yodlove/article/details/5938022[/url];
[url]http://blog.csdn.net/jk_yu520/article/details/6622661[/url];
用到单点登陆真是蛋疼。而且还跳来跳去,不得不跟着一步一步来。
修改:
昨天没看细节,发现cookie有问题。导致登陆是成功的,但是使用cookie的时候出问题,只好继续埋头BAIDU,发现一个虽然目前无法登陆但还是可以借鉴的源代码.
[url]http://www.lupaworld.com/code.php?mod=list&itemid=26&path=weibobackup_1.1/src/cn/jayslong/weibo&file=Login.java[/url]
主要代码修改:增加了
 DefaultHttpParams.getDefaultParams().setBooleanParameter(
HttpMethodParams.SINGLE_COOKIE_HEADER, true);

废除了mergeCookie( )方法。
代码

package com.cp.http;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpState;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;

import com.cp.http.Sh1;

/**
*
*
* @author chengping
*
*/
public class TestClient {

static final String LOGON_SITE = "login.sina.com.cn";
static final int LOGON_PORT = 80;
static final String preLoginUrl = "http://login.sina.com.cn/sso/prelogin.php?entry=miniblog&callback=sinaSSOController.preloginCallBack&client=ssologin.js(v1.3.14)&_=1313560817097";
static final String loginurl = "/sso/login.php?client=ssologin.js(v1.3.14)";

static final String defaultUser = "xxxxx";
static final String defaultPasswordOld = "xxxxxx";

static final String homePageUrl = "http://weibo.com/";

public static void main(String[] args) throws Exception {
HttpClient client = new HttpClient();
client.getParams().setParameter(
HttpMethodParams.HTTP_CONTENT_CHARSET, "UTF8");
//原先采用自己合并COOKIE,但是出现问题,用此方法OK
DefaultHttpParams.getDefaultParams().setBooleanParameter(
HttpMethodParams.SINGLE_COOKIE_HEADER, true);
client.getParams().setParameter(HttpMethodParams.USER_AGENT,"Mozilla/5.0 (X11; U; Linux i686; zh-CN; rv:1.9.1.2) Gecko/20090803 Fedora/3.5.2-2.fc11 Firefox/3.5.2");
client.getHostConfiguration().setHost(LOGON_SITE, LOGON_PORT);
String ajaxLoginUrl=Login(client, preLogin(client));
String uniqueid=ajaxLogin(client,ajaxLoginUrl);
getHomePage(client,homePageUrl+uniqueid);

}
/**
* 获取用户的主页
* @param client
* @param homePageUrl
* @throws IOException
*/
public static void getHomePage(HttpClient client,String homePageUrl) throws IOException {
// TODO Auto-generated method stub
GetMethod get = new GetMethod(homePageUrl);
try {
client.executeMethod(get);
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
String response = get.getResponseBodyAsString();
System.out.println(response);
}
/**
* preLogin获得servertime和一个定长随机的字符串nonce
* 用户账号采用默认账号
* @param client
* @return
* @throws Exception
*/
public static Map<String, String> preLogin(HttpClient client)
throws Exception {
return preLogin(client, defaultUser);
}
/**
* preLogin获得servertime和一个定长随机的字符串nonce
* 用户账号采用输入账号
* @param client
* @param userEmail
* @return
* @throws Exception
*/
public static Map<String, String> preLogin(HttpClient client,
String userEmail) throws Exception {
GetMethod get = new GetMethod(preLoginUrl + "&usr=" + userEmail);
try {
client.executeMethod(get);
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
String response = get.getResponseBodyAsString();

// System.out.println(response);
// System.out.println(get.getFollowRedirects());
// System.out.println(get.getPath());
// for (Header h : get.getResponseHeaders()) {
// System.out.print(h);
// }
get.releaseConnection();
printCookie(client);
mergeCookie(client);
printCookie(client);
System.out.println("-------------preLogin结束--------------");
return responseBodyToMap(response);
}
/**
* prelogin后拿到2个参数用来生成加密的密码,正式登陆。
* 账户,密码是系统默认
* @param client
* @param serverTimeAndNonce
* @return
* @throws IOException
* @throws HttpException
*/
public static String Login(HttpClient client,
Map<String, String> serverTimeAndNonce) throws IOException,
HttpException {
return Login(client, serverTimeAndNonce, defaultUser,
defaultPasswordOld);
}

/**
* prelogin后拿到2个参数用来生成加密的密码,正式登陆。
* 账户,密码采用输入参数
* @param client
* @param serverTimeAndNonce
* @param userEmail
* @param passwordOld
* @return
* @throws IOException
* @throws HttpException
*/
public static String Login(HttpClient client,
Map<String, String> serverTimeAndNonce, String userEmail,
String passwordOld) throws IOException, HttpException {
PostMethod post = new PostMethod(loginurl);
// 初始化POST方法的content
NameValuePair entry = new NameValuePair("entry", "miniblog");
NameValuePair gateway = new NameValuePair("gateway", "1");
NameValuePair from = new NameValuePair("from", "");
NameValuePair savestate = new NameValuePair("savestate", "7");
NameValuePair useticket = new NameValuePair("useticket", "1");
NameValuePair ssosimplelogin = new NameValuePair("ssosimplelogin", "1");
NameValuePair username = new NameValuePair("username", userEmail);
NameValuePair service = new NameValuePair("service", "miniblog");
NameValuePair servertime = new NameValuePair("servertime",
serverTimeAndNonce.get("servertime"));
NameValuePair nonce = new NameValuePair("nonce", serverTimeAndNonce
.get("nonce"));
NameValuePair pwencode = new NameValuePair("pwencode", "wsse");
NameValuePair password = new NameValuePair("password", encryption(
passwordOld, serverTimeAndNonce));
NameValuePair encoding = new NameValuePair("encoding", "utf-8");
NameValuePair url = new NameValuePair(
"url",
"http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack");
NameValuePair returntype = new NameValuePair("returntype", "META");
post.addParameters(new NameValuePair[] { entry, gateway, from,
savestate, useticket, ssosimplelogin, username, service,
servertime, nonce, pwencode, password, encoding, url,
returntype });

int status = client.executeMethod(post);
String responseBodyAsString = post.getResponseBodyAsString();

/* System.out.println(status);

System.out.println(response);
System.out.println(post.getFollowRedirects());
System.out.println(post.getPath());
for (Header h : post.getResponseHeaders()) {
System.out.print(h);
}*/
//printCookie(client);
//mergeCookie(client);
//printCookie(client);
System.out.println("-------------Login结束--------------");
post.releaseConnection();

return getAjaxUrl(responseBodyAsString);
}
/**
* 加密密码
* @param passwordOld
* @param serverTimeAndNonce
* @return
*/
private static String encryption(String passwordOld,
Map<String, String> serverTimeAndNonce) {
// TODO Auto-generated method stub
String tempPassword = Sh1.testDigest(Sh1.testDigest(passwordOld));
tempPassword += serverTimeAndNonce.get("servertime");
tempPassword += serverTimeAndNonce.get("nonce");
return Sh1.testDigest(tempPassword);
}
/**
* login后密码验证成功后跳入ajaxLogin,获得唯一标示码。
* @param client
* @param ajaxUrl
* @return
* @throws IOException
* @throws HttpException
*/
public static String ajaxLogin(HttpClient client, String ajaxUrl)
throws IOException, HttpException {
GetMethod getByAjax = new GetMethod(ajaxUrl);
client.executeMethod(getByAjax);
System.out.println(getByAjax.getURI());
//printCookie(client);
//mergeCookie(client);
//printCookie(client);
System.out.println("-------------ajaxLogin结束--------------");
//System.out.println(getByAjax.getResponseBodyAsString());
String responseBodyAsString=getByAjax.getResponseBodyAsString();
getByAjax.releaseConnection();

return getUniqueid(responseBodyAsString);
}
/**
* 从AjaxLogin返回的信息中获得唯一标识码
* @param responseBodyAsString
* @return
*/
private static String getUniqueid(String responseBodyAsString) {
// TODO Auto-generated method stub
int start=responseBodyAsString.indexOf("uniqueid")+11;
int end=responseBodyAsString.indexOf("userid")-3;
//System.out.println(responseBodyAsString);
//System.out.println(responseBodyAsString.indexOf("uniqueid"));
//System.out.println(responseBodyAsString.indexOf("userid"));
//System.out.println(responseBodyAsString.substring(start, end));
return responseBodyAsString.substring(start, end);
}
/**
* 将preLogin返回信息提取需要的二个参数转为MAP形式
* @param response
* @return
*/
private static Map<String, String> responseBodyToMap(String response) {
Map<String, String> result = new HashMap<String, String>();
int start = response.indexOf("{") + 1;
int end = response.indexOf("}");
String body = response.substring(start, end);
String[] strings = body.split(",");
for (int i = 1; i < 3; i++) {
String[] elements = strings[i].split(":");
result.put(elements[0].replace("\"", ""), elements[1].replace("\"",
""));
}

return result;
}
/**
* 构建阶段时输出cookie值
* @param client
*/
private static void printCookie(HttpClient client){
Cookie[] cookies = client.getState().getCookies();
System.out.println("目前有"+cookies.length+"条cookie");
int index =0;
for(Cookie cookie:cookies){
System.out.println("cookie["+index+"]:{"+cookie.getName()+","+cookie.getValue()+"}");
index++;
}

}
/**
* 合并cookie信息放到一个cookie中
* 此方法会出错,无效。
* @param client
*/
private static void mergeCookie(HttpClient client){
Cookie[] cookies = client.getState().getCookies();
if (cookies != null && cookies.length > 0) {
String cook = cookies[0].getValue();
for (int i = 1; i < cookies.length; i++) {
cook += "; " + cookies[i].getName() + "="
+ cookies[i].getValue();
}
cookies[0].setValue(cook);
HttpState state = new HttpState();
state.addCookie(cookies[0]);
client.setState(state);
}
}
/**
* 从login方法后返回的信息中获得下一步AjaxLogin的URL
* @param responseBodyAsString
* @return
*/
private static String getAjaxUrl(String responseBodyAsString){
int start=responseBodyAsString.indexOf("replace")+9;
int end=responseBodyAsString.indexOf("</script>")-6;
//System.out.println(responsBodyAsString);
//System.out.println(responsBodyAsString.indexOf("</script>"));
//System.out.println(responsBodyAsString.substring(start, end));
return responseBodyAsString.substring(start, end);
}
}

下面是密码加密的辅助类,sina采用的加密是密码二次SHA1加密后得到的字符串拼接上preLogin得到的二个参数servertime(服务器时间),nonce(随机生成的字符串)。再用SHA加密。(我靠,sina他不累啊。)
package com.cp.http;

public class Sh1 {
public static String testDigest(String info)
{
try {

//java.security.MessageDigest alg=java.security.MessageDigest.getInstance("MD5");
java.security.MessageDigest alga=java.security.MessageDigest.getInstance("SHA-1");
alga.update(info.getBytes());
byte[] digesta=alga.digest();
// System.out.println("本信息摘要是:"+byte2hex(digesta));
//通过某中方式传给其他人你的信息(myinfo)和摘要(digesta) 对方可以判断是否更改或传输正常
return byte2hex(digesta);
}
catch (java.security.NoSuchAlgorithmException ex) {
System.out.println("非法摘要算法");
return "erro";
}

}
public static String byte2hex(byte[] b) //二行制转字符串
{
String hs="";
String stmp="";
for (int n=0;n<b.length;n++)
{
stmp=(Integer.toHexString(b[n] & 0XFF));
if (stmp.length()==1) hs=hs+"0"+stmp;
else hs=hs+stmp;
// if (n<b.length-1) hs=hs+":";
}
return hs;
}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值