使用jsoup登陆网站,重要的就是看这个网站登录的时候需要什么信息?
步骤:
(1) Jsoup.connect(url)创建一个connect对象,注意:这个方法只支持Web URLs (http和https 协议)。
(2) 设置User-Agentconn.userAgent(user_agent);
(3) 设置请求参数conn.data(params);
(4) 进行提交,并且获得返回值Connection.Response
(5) 从中解析cookies
package com.xing.jsoup;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.jsoup.Connection;
import org.jsoup.Connection.Method;
import org.jsoup.Jsoup;
public class FangdoCrawl {
protected static int timeout = 1000*60;
protected final static String user_agent = "Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20100101 Firefox/12.0 AlexaToolbar/alxf-2.15";
protected static String LOGIN_FLAG = "JSESSIONID";
/**
* @param args
*/
public static void main(String[] args) {
FangdoCrawl crawl = new FangdoCrawl();
crawl.login();
}
public void login(){
//将要登陆的地址
String fangdo = "http://www.16msg.com/web/doLogin.action";
//准备登陆参数
HashMap<String, String> params = new HashMap<String, String>(3);
params.put("phone_id", "*******");
params.put("password", "*******");
//获取连接
Connection httpCon = getConnection(fangdo,params);
try {
//使用post方式进行提交
Connection.Response res = httpCon.method(Method.POST).execute();
//解析cookie
String jsessionid = res.cookie(LOGIN_FLAG);
System.out.println("jsessionid="+jsessionid);
} catch (IOException e) {
e.printStackTrace();
}
}
public Connection getConnection(String url,Map<String, String> params){
return getConnection(url, timeout, params);
}
public Connection getConnection(String url,int timeout, Map<String, String> params){
Connection conn = Jsoup.connect(url);
//设置userAgent
conn = conn.userAgent(user_agent);
//设置超时
conn = conn.timeout(timeout);
//设置请求头的相应信息
conn = conn.header("accept-encoding", "gzip, deflate");
//
if(params != null && params.size()>0){
//设置请求参数
conn = conn.data(params);
}
return conn;
}
}