package test.app;
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class WebCrawler {
public static void main(String[] args) {
new WebCrawler().crawlerPost("**********");
}
// 爬网页sUrl
public void crawler(String sUrl) {
HttpClientParams params = new HttpClientParams();
params.setParameter("account", "leo.li");
params.setParameter("password", "****");
HttpClient httpClient = new HttpClient(params);
// httpClient.getHostConfiguration().setProxy("", 0);
HttpMethod method = new GetMethod(sUrl);
method.setRequestHeader("cookie",
"lang=zh-cn; theme=default; lastProject=13; preProjectID=13; moduleBrowseParam=0; productBrowseParam=0; projectTaskOrder=status%2Cid_desc; preProductID=8; preBranch=0; bugModule=0; lastProduct=8; qaBugOrder=idow; windowHeight=925; windowWidth=1026; zentaosid=iss55g3k0do2bdgd9piin2g0p0");
try {
httpClient.executeMethod(method);
System.out.println(method.getStatusLine());
String html = method.getResponseBodyAsString();
method.releaseConnection();
this.parseContext(html);
} catch (Exception e) {
// TODO: handle exception
}
}
// 爬网页sUrl
public void crawlerPost(String sUrl) {
// 设置登陆时要求的信息,用户名和密码
NameValuePair[] data = { new NameValuePair("account", "leo.li"), new NameValuePair("password", "*****") };
HttpClient httpClient = new HttpClient();
// httpClient.getHostConfiguration().setProxy("", 0);
// HttpMethod method = new PostMethod(sUrl);
PostMethod method = new PostMethod(sUrl);
method.setRequestBody(data);
// method.setRequestHeader("cookie",
// "lang=zh-cn; theme=default; lastProject=13; preProjectID=13; moduleBrowseParam=0; productBrowseParam=0; projectow; windowHeight=925; windowWidth=1026; zentaosid=iss55g3k0do2bdgd9piin2g0p0");
try {
httpClient.executeMethod(method);
System.out.println(method.getStatusLine());
// 获得登陆后的 Cookie
Cookie[] cookies = httpClient.getState().getCookies();
StringBuffer tmpcookies = new StringBuffer();
for (Cookie c : cookies) {
tmpcookies.append(c.toString() + ";");
System.out.println("cookies = "+c.toString());
}
System.out.println(tmpcookies.toString());
String html = method.getResponseBodyAsString();
method.releaseConnection();
this.parseContext(html);
} catch (Exception e) {
// TODO: handle exception
}
}
// 从context提取url地址
public void parseContext(String context) {
Document document = Jsoup.parse(context);
System.out.println(document.select("a"));
}
}
import org.apache.commons.httpclient.Cookie;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
public class WebCrawler {
public static void main(String[] args) {
new WebCrawler().crawlerPost("**********");
}
// 爬网页sUrl
public void crawler(String sUrl) {
HttpClientParams params = new HttpClientParams();
params.setParameter("account", "leo.li");
params.setParameter("password", "****");
HttpClient httpClient = new HttpClient(params);
// httpClient.getHostConfiguration().setProxy("", 0);
HttpMethod method = new GetMethod(sUrl);
method.setRequestHeader("cookie",
"lang=zh-cn; theme=default; lastProject=13; preProjectID=13; moduleBrowseParam=0; productBrowseParam=0; projectTaskOrder=status%2Cid_desc; preProductID=8; preBranch=0; bugModule=0; lastProduct=8; qaBugOrder=idow; windowHeight=925; windowWidth=1026; zentaosid=iss55g3k0do2bdgd9piin2g0p0");
try {
httpClient.executeMethod(method);
System.out.println(method.getStatusLine());
String html = method.getResponseBodyAsString();
method.releaseConnection();
this.parseContext(html);
} catch (Exception e) {
// TODO: handle exception
}
}
// 爬网页sUrl
public void crawlerPost(String sUrl) {
// 设置登陆时要求的信息,用户名和密码
NameValuePair[] data = { new NameValuePair("account", "leo.li"), new NameValuePair("password", "*****") };
HttpClient httpClient = new HttpClient();
// httpClient.getHostConfiguration().setProxy("", 0);
// HttpMethod method = new PostMethod(sUrl);
PostMethod method = new PostMethod(sUrl);
method.setRequestBody(data);
// method.setRequestHeader("cookie",
// "lang=zh-cn; theme=default; lastProject=13; preProjectID=13; moduleBrowseParam=0; productBrowseParam=0; projectow; windowHeight=925; windowWidth=1026; zentaosid=iss55g3k0do2bdgd9piin2g0p0");
try {
httpClient.executeMethod(method);
System.out.println(method.getStatusLine());
// 获得登陆后的 Cookie
Cookie[] cookies = httpClient.getState().getCookies();
StringBuffer tmpcookies = new StringBuffer();
for (Cookie c : cookies) {
tmpcookies.append(c.toString() + ";");
System.out.println("cookies = "+c.toString());
}
System.out.println(tmpcookies.toString());
String html = method.getResponseBodyAsString();
method.releaseConnection();
this.parseContext(html);
} catch (Exception e) {
// TODO: handle exception
}
}
// 从context提取url地址
public void parseContext(String context) {
Document document = Jsoup.parse(context);
System.out.println(document.select("a"));
}
}