import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
/**
* @author WYP
*/
public class DataProcessor {
public static void main(String[] args) {
// 登陆 Url
String loginUrl = "http:checkUser.jsp";
// 需登陆后访问的 Url
String dataUrl = "http:report.jsp?reportType=1&menuId=M";
HttpClient httpClient = new HttpClient();
// 模拟登陆,按实际服务器端要求选用 Post 或 Get 请求方式
PostMethod postMethod = new PostMethod(loginUrl);
// 设置登陆时要求的信息,用户名和密码
NameValuePair[] data = {new NameValuePair("username", "12345"), new NameValuePair("password", "12345")};
postMethod.setRequestBody(data);
try {
// 设置 HttpClient 接收 Cookie,用与浏览器一样的策略
httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
httpClient.executeMethod(postMethod);
// 获得登陆后的 Cookie
Cookie[] cookies = httpClient.getState().getCookies();
StringBuilder tmpCookies = new StringBuilder();
for (Cookie c : cookies) {
tmpCookies.append(c.toString()).append(";");
System.out.println(c.toString());
}
// 登录成功后的操作
GetMethod getMethod = new GetMethod(dataUrl);
// 每次访问需授权的网址时需带上前面的 cookie 作为通行证
getMethod.setRequestHeader("cookie", tmpCookies.toString());
// 还可以通过 PostMethod/GetMethod 设置更多的请求后数据
// referer 标识从哪里来的
postMethod.setRequestHeader("Referer", "http:login.jsp");//可以从Fiddler里看到相关信息
postMethod.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0");
httpClient.executeMethod(getMethod);
// 拿到页面数据
String html = getMethod.getResponseBodyAsString();
// 解析获取的页面数据
Document doc = Jsoup.parseBodyFragment(getMethod.getResponseBodyAsString());
Elements elementsTr = doc.select("#rpttable");
System.out.println(elementsTr.toString());
} catch (Exception e) {
e.printStackTrace();
}
}
}
总结:loginUrl 和 dataUrl 要获取准确,可以用Fiddler查看所有的请求路径和格式