分析登陆步骤(http://login.weibo.cn)
点击登录
注意:登陆链接不是http://login.weibo.cn,而是http://login.weibo.cn/login/?ns=1&revalid=2&backURL=http%3A%2F%2Fweibo.cn%2F&backTitle=%CE%A2%B2%A9&vt=,观察发现该链接为固定链接
观察登陆所需参数(采用Firefox firebug)
发现字段backURL,password_****,vk为变化字段,需要获取
发现Cookie字段,需要获取
字段获取
登陆界面源码可以获取相关字段
登陆界面响应头信息获取cookie
登陆之后再次跳转
获取cookie和location(跳转链接)
完整代码
import java.util.ArrayList;
import java.util.List;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
public class SinaCrawler {
public static void main(String[] args){
CloseableHttpClient client = HttpClients.createDefault();
CloseableHttpResponse response = null;
try{
//访问http://login.weibo.cn会跳转,直接访问点击登录按钮后的链接
HttpGet get = new HttpGet("http://login.weibo.cn/login/?ns=1&revalid=2&backURL=http%3A%2F%2Fweibo.cn%2F&backTitle=%CE%A2%B2%A9&vt=");
response = client.execute(get);
String cookie = null;
for(Header header:response.getHeaders("Set-Cookie")){
cookie = header.toString().split(":")[1].trim();
}
System.out.println(cookie);
HttpEntity entity = response.getEntity();
String html = EntityUtils.toString(entity); //获取网页源码
EntityUtils.consume(entity);
//采用jsoup解析,获取password_字段,backURL字段,vk字段
Document doc = Jsoup.parse(html);
Element form = (Element) doc.select("form").first();
String action = form.attr("action");
System.out.println(action);
Document formDoc = Jsoup.parse(form.html());
//获取backURL
Element a = formDoc.select("input[name=backURL]").first();
String backURL = a.attr("value");
//System.out.println(backURL);
//获取password_
Element passwordE = formDoc.select("input[type=password]").first();
String password = passwordE.attr("name");
//System.out.println(password);
//获取vk
Element vkE = formDoc.select("input[name=vk]").first();
String vk = vkE.attr("value");
//System.out.println(vk);
HttpPost post = new HttpPost("http://login.weibo.cn/login/"+action);
System.out.println("http://login.weibo.cn/login/"+action);
post.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; rv:39.0) Gecko/20100101 Firefox/39.0");
post.setHeader("Referer", "http://login.weibo.cn/login/?ns=1&revalid=2&backURL=http%3A%2F%2Fweibo.cn%2F&backTitle=%CE%A2%B2%A9&vt");
post.setHeader("Host", "login.weibo.cn");
post.setHeader("Cookie", cookie);
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("backTitle", "微博"));
nvps.add(new BasicNameValuePair("backURL", backURL));
nvps.add(new BasicNameValuePair("mobile", "*****"));
nvps.add(new BasicNameValuePair(password, "*****"));
nvps.add(new BasicNameValuePair("remember", "on"));
nvps.add(new BasicNameValuePair("submit", "登录"));
nvps.add(new BasicNameValuePair("tryCount", ""));
nvps.add(new BasicNameValuePair("vk", vk));
if (nvps != null) {
post.setEntity(new UrlEncodedFormEntity(nvps, "UTF-8"));
}
response = client.execute(post);
StringBuffer cookieLogin = new StringBuffer();
for(Header header:response.getHeaders("Set-Cookie")){
//System.out.println(header);
cookieLogin.append(header.toString().split(":")[1]);
}
for(Header header:response.getHeaders("Set-Cookie")){
//System.out.println(header);
cookieLogin.append(header.toString().split(":")[1]);
}
System.out.println(cookieLogin.toString());
String location = null;
for(Header header:response.getHeaders("Location")){
location = header.toString().split(": ")[1];
}
System.out.println(location);
HttpGet get3 = new HttpGet(location);
get3.setHeader("Cookie",cookieLogin.toString());
response = client.execute(get3);
HttpEntity entity2 = response.getEntity();
System.out.println("contentLength:"+entity2.getContentLength());
String content = EntityUtils.toString(entity2);
System.out.println("content:"+content);
}catch(Exception e){
e.printStackTrace();
}finally{
if(response!=null){
try{
response.close();
}catch(Exception e){
e.printStackTrace();
}
}
}
}
}