java网络爬虫模拟登录案例教学2

本文为原创博客,仅供技术学习使用。未经允许,禁止将其复制下来上传到百度文库等平台。

模拟登录的网站

我们需要登录的网站为:autonews,模拟登陆的地址为:
https://home.autonews.com/clickshare/cspLogin.do


这里写图片描述

网络抓包分析

在请求表单中,输入用户名及密码。


这里写图片描述

可以看到,我们要向后台提交的数据有哪些。

CSParamsToPassNames:CSParamsToPassNames
userLogin.userName:你的用户名
userLogin.password:你的密码
CSDropAuthCookieSpecified:1
CSDropAuthCookie:1
submit:Sign In

实战代码

package crawlerTest;
/*   
 *  合肥工业大学 管理学院 qianyang 1563178220@qq.com
 */
import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.List;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.protocol.HTTP;
import org.apache.http.util.EntityUtils;

@SuppressWarnings("deprecation")
public class AutonewsLogin {
    // The configuration items  
    //输入用户名及密码
    private static String userName = "";  
    private static String password = "";  
    private static String redirectURL = "https://home.autonews.com/clickshare/myhome.do";  

    // Don't change the following URL  
    private static String renRenLoginURL = "https://home.autonews.com/clickshare/cspLogin.do";  

    // The HttpClient is used in one session  
    private HttpResponse response;  
    private DefaultHttpClient httpclient = new DefaultHttpClient();  

    private boolean login() {  
        //open the LoginURL
        HttpPost httpost = new HttpPost(renRenLoginURL);  
        // All the parameters post to the web site
        //建立一个NameValuePair数组,用于存储欲传送的参数
        List<NameValuePair> nvps = new ArrayList<NameValuePair>();  
        //        nvps.add(new BasicNameValuePair("CSAuthReq", "1"));  
        //        nvps.add(new BasicNameValuePair("CSTargetURL", "http%3A%2F%2Fwww.autonews.com%2F"));  
        //        nvps.add(new BasicNameValuePair("CSResumeURL", "/clickshare/forceLogin.do"));  
        //        nvps.add(new BasicNameValuePair("CSParamsToPassNames", "CSAuthReq|CSTargetURL|CSResumeURL|CSParamsToPassNames"));  
        nvps.add(new BasicNameValuePair("userLogin.userName", userName));  
        nvps.add(new BasicNameValuePair("userLogin.password", password));  
        //        nvps.add(new BasicNameValuePair("CSDropAuthCookieSpecified", "1")); 
        //        nvps.add(new BasicNameValuePair("CSDropAuthCookie", "1")); 
        nvps.add(new BasicNameValuePair("submit", "Sign In")); 
        try {  
            httpost.setEntity(new UrlEncodedFormEntity(nvps, HTTP.UTF_8));  
            response = httpclient.execute(httpost); 
            int StatusCode = response.getStatusLine().getStatusCode();
            System.out.println(StatusCode);
        } catch (Exception e) {  
            e.printStackTrace();  
            return false;  
        } finally {  
            httpost.abort();  
        }  
        return true;  
    }  

    private String getRedirectLocation() {  
        Header locationHeader = response.getFirstHeader("Location");  
        if (locationHeader == null) {  
            return null;  
        }  
        return locationHeader.getValue();  
    }  

    private String getText(String redirectLocation) {  
        HttpGet httpget = new HttpGet(redirectLocation);  
        // Create a response handler  
        ResponseHandler<String> responseHandler = new BasicResponseHandler();  
        String responseBody = "";  
        try {  
            responseBody = httpclient.execute(httpget, responseHandler);  
        } catch (Exception e) {  
            e.printStackTrace();  
            responseBody = null;  
        } finally {  
            httpget.abort();  
            httpclient.getConnectionManager().shutdown();  
        }  
        return responseBody;  
    }  

    public String printText() { 
        String html="";
        if (login()) {  
            String redirectLocation = getRedirectLocation();  
            if (redirectLocation != null) {  
                html=getText(redirectURL);  
            }  
        }  
        return html;
    }  
    public static void main(String[] args) throws IOException {  
        AutonewsLogin AutonewsLogin = new AutonewsLogin(); 
        BufferedWriter writer = new BufferedWriter( new OutputStreamWriter( new FileOutputStream( new File("D:\\d.txt")),"gbk"));
        String html=AutonewsLogin.printText();  
        writer.write(html);
        writer.close();
        AutonewsLogin.downloadFile("http://www.autonews.com/assets/PDF/CA11537753.PDF","E:\\zipFile\\","1");
    } 
    //下载该网页的pdf文件
    public  void downloadFile(String fileURL, String saveDir,String fileName)
            throws IOException {
        File fileDir=new File(saveDir);
        if(!fileDir.exists()){
            fileDir.mkdirs();
        }
        //图片或zip下载保存地址
        String filename=saveDir+fileName+".pdf";
        File file=new File(filename);
        if(file.exists()){
            file.delete();
        }
        BufferedOutputStream bw = new BufferedOutputStream(new FileOutputStream(filename)); 
        HttpGet httpGet=new HttpGet(fileURL);
        HttpResponse httpResponse = httpclient.execute(httpGet);
        try {
            HttpEntity entity=httpResponse.getEntity();
            int i=-1;
            byte[] byt= EntityUtils.toByteArray(entity); 
            bw.write(byt);
            System.out.println("文件下载成功!");
        } catch (ClientProtocolException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        bw.close();
    }
}

程序结果


这里写图片描述


这里写图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值