之前想要爬取人人上的一些信息,因为需要登陆,所以就从网上直接拷贝了一份代码来实现人人登陆,但发现搜素到的大部分都没有实现验证码验证。故自己添加了验证码验证,当然自己偷懒,没有解析图片,而是直接把图片保存到本地,还需要手动输入验证码。当然登陆成功之后,就可以爬取大量的其他人的信息,这里只返回的自己的页面的源码。
public class RenrenLogin {
public static String userName = "xxxx@126.com";
public static String password = "xxxxx";
public static String redirectURL = "http://www.renren.com/home";
public static String renRenLoginURL = "http://www.renren.com/PLogin.do";
private HttpResponse response;
private HttpResponse resPic;
public static String picUrl = "http://icode.renren.com/getcode.do?t=web_login&rnd=Math.random()";
private HttpClient httpClient = new DefaultHttpClient();
public static void main(String []args) throws IOException{
RenrenLogin rl = new RenrenLogin();
rl.printText();
}
private void getPic(){
try {
HttpGet httpget=new HttpGet(picUrl);
resPic = httpClient.execute(httpget);
HttpEntity entity = resPic.getEntity();
InputStream is = entity.getContent();
FileOutputStream out = new FileOutputStream (new File("F:\\new.png"));
int read = 0;
byte[]bytes=new byte[1024];
while((read=is.read(bytes))!=-1)
out.write(bytes,0,read);
is.close();
out.flush();
out.close();
} catch (IllegalStateException e1) {
e1.printStackTrace();
} catch (IOException e1) {
e1.printStackTrace();
}
}
public boolean login() throws IOException{
HttpPost httpPost = new HttpPost(renRenLoginURL);
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("origURL", redirectURL));
nvps.add(new BasicNameValuePair("domain", "renren.com"));
nvps.add(new BasicNameValuePair("isplogin", "false"));
nvps.add(new BasicNameValuePair("formName", ""));
nvps.add(new BasicNameValuePair("method", "post"));
nvps.add(new BasicNameValuePair("submit", "登录人人网"));
nvps.add(new BasicNameValuePair("email", userName));
nvps.add(new BasicNameValuePair("password", password));
nvps.add(new BasicNameValuePair("autoLogin", "true"));
BufferedReader strin=new BufferedReader(new InputStreamReader(System.in));
getPic();//下载图片并保存到本地,自己需打开图片查看
System.out.println("请到F盘查看图片验证码并在控制台中输入:");
nvps.add(new BasicNameValuePair("icode", strin.readLine().toString()));
nvps.add(new BasicNameValuePair("key_id", "1"));
nvps.add(new BasicNameValuePair("captcha_type", "web_login"));
try {
httpPost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));
response = httpClient.execute(httpPost);
System.out.println(response.getStatusLine());
System.out.println(response.getParams());
System.out.println(response.getFirstHeader("Location").getValue());
} catch (Exception e) {
e.printStackTrace();
return false;
} finally{
httpPost.abort();
}
return true;
}
public String getText(String redirectLocation){
HttpGet httpGet = new HttpGet(redirectLocation);
ResponseHandler<String> responseHandler = new BasicResponseHandler();
String responseBody = "";
try {
responseBody = httpClient.execute(httpGet, responseHandler);
} catch (Exception e) {
e.printStackTrace();
responseBody = null;
} finally {
httpGet.abort();
httpClient.getConnectionManager().shutdown();
}
return responseBody;
}
private String getRedirectLocation() {
Header locationHeader = response.getFirstHeader("Location");
if (locationHeader == null) {
return null;
}
return locationHeader.getValue();
}
private void printText() throws IOException{
if(login()){
String redirectLocation = getRedirectLocation();
if (redirectLocation != null) {
System.out.println(getText(redirectLocation));
}
}
}