/**
* 配置好CookieStore,让HttpClient自动管理cookies
*/
CookieStore store=new BasicCookieStore();
RequestConfig config= RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build();
/**
* 登录新闻网
*/
HttpClient client= HttpClients.custom().setDefaultRequestConfig(config).setDefaultCookieStore(store).build();
HttpPost post=new HttpPost("http://news.gdut.edu.cn/UserLogin.aspx?preURL=http%3a%2f%2fnews.gdut.edu.cn%2fdefault.aspx&__VIEWSTATE=%2FwEPDwUKLTQwOTA4NzE2NmQYAQUeX19Db250cm9sc1JlcXVpcmVQb3N0QmFja0tleV9fFgEFI2N0bDAwJENvbnRlbnRQbGFjZUhvbGRlcjEkQ2hlY2tCb3gxBufpEJuDDaf6eTj0A4Cn2Erf8u98KcGrQqATTB3mEaQ%3D&__EVENTVALIDATION=%2FwEWBQKb37HjDwLgvLy9BQKi4MPwCQL%2BzqO2BAKA4sljg4IvzC7ksG01o7aN0RZUOKEC4lV0bTeXI4zrbaQsj0c%3D&ctl00%24ContentPlaceHolder1%24userEmail=gdutnews&ctl00%24ContentPlaceHolder1%24userPassWord=newsgdut&ctl00%24ContentPlaceHolder1%24CheckBox1=on&ctl00%24ContentPlaceHolder1%24Button1=%E7%99%BB%E5%BD%95");
HttpResponse response=client.execute(post);
System.out.println(response.getStatusLine());
/**
* 进入具体通知页面
*/
HttpGet get = new HttpGet("http://news.gdut.edu.cn/ViewArticle.aspx?articleid=144748");
response = client.execute(get);
InputStream input=response.getEntity().getContent();
BufferedReader reader
=new BufferedReader(
new InputStreamReader(
input
)
);
String str,resStr="";
while ((str=reader.readLine())!=null){
resStr+=str;
}
/**
* 解析页面
*/
Document doc=Jsoup.parse(resStr);
System.out.println(doc.getElementById("content"));
爬进gdut新闻网的实现
最新推荐文章于 2024-04-21 09:58:50 发布