通过前面的学习,我们已经可以对不需要登录的网页正常访问,但现在的网页大部分都需要用户注册,因此这里以学人人网为例,学习一下网站的模拟登录。
首先对http://www.renren.com/进行爬取,对得到的内容进行分析,可以看到画红线的部分,当点击“登录”时,触发的动作是跳转到这个页面:http://www.renren.com/PLogin.do。
package RenRen;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
/**
* 获取得到人人网的登录页面:
* http://www.renren.com/PLogin.do
*/
public class RR_Preprocess {
final static Logger log = LoggerFactory.getLogger(RR_Preprocess.class);
public static void main(String[] args) throws IOException {
//创建HttpClient
CloseableHttpClient httpClient = HttpClients.createDefault();
//目标网址
String url = "http://www.renren.com";
//请求方法
HttpGet httpGet = new HttpGet(url);
//发送请求,获得响应
CloseableHttpResponse httpResponse = httpClient.execute(httpGet);
//判断响应码
int statusCode = httpResponse.getStatusLine().getStatusCode();
if(statusCode == 200){
//获取网页实例
String entity = EntityUtils.toString(httpResponse.getEntity());
//Jsoup解析网页
Document document = Jsoup.parse(entity);
log.info(document.toString());
}
}
}
因此可以将登录的用户名和密码预先封装好,然后直接以Post方法提交用户名和密码访问http://www.renren.com/PLogin.do页面,然后对于登录成功的用户跳转到该请求返回的一个新网址中:
然后以直接以Get方法请求该页面即可:
package RenRen;
import org.apache.http.Header;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.message.BasicNameValuePair;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
/**
* 由RR_Preprocess.java得到登录跳转页面
* 这里先以Post方法请求登录页面,然后再以Get方法请求登录后的页面
*/
public class RR_Login {
public static void main(String[] args){
//创建HttpClient
CloseableHttpClient httpClient = HttpClients.createDefault();
//请求的目标网址
String rr_url = "http://www.renren.com/PLogin.do";
HttpPost httpPost = new HttpPost(rr_url);
//以Post方式请求,设置登录用户名和密码
List<NameValuePair> nameValuePairs = new ArrayList<>();
nameValuePairs.add(new BasicNameValuePair("email", "******")); //自己用户名
nameValuePairs.add(new BasicNameValuePair("password", "******"));//自己密码
try {
httpPost.setEntity(new UrlEncodedFormEntity(nameValuePairs));
CloseableHttpResponse httpResponse = httpClient.execute(httpPost);
System.out.println(httpResponse);
//获取请求头
Header header = httpResponse.getFirstHeader("Location");
if(header != null){
//以Get方法请求得到重定向的URL
HttpGet httpGet = new HttpGet(header.getValue());
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String res = httpClient.execute(httpGet, responseHandler);
System.out.println(res);
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
参考资料:
1、http://blog.csdn.net/qy20115549/article/details/52249232