HttpClient抓取【需登录跳转页面】的数据

大家都知道HttpClient可以抓取页面数据,但是有的页面需要用户登录后才可以访问,第一次我用浏览器登录了,把浏览器的Cookie放了进去,可以抓取,但是一天后服务器的Session就过期了,这样很麻烦,后来在网上找了很多资料,才有了下面的版本,下面需要设置两个URL,一个是登录页的,主要是用来获取登录后的Cookie,然后就可以请求第二次的URL了。代码很简单应该大家都可以看懂,我就不解释了。

package cn.amazon.http;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.CookieStore;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;

//对接口进行测试  
public class getCookie {
    private String loginUrl = "";
    private String SearchUrl = "";
    private String charset = "UTF-8";

    public void test() {
        //存放发送参数
        Map<String, String> createMap = new HashMap<String, String>();
        createMap.put("userName", "");
        createMap.put("password", "");
        createMap.put("email", "huayanh@sellercs.amazon.com");
        HttpPost httpPost = null;
        HttpPost httpPost2 = null;
        HttpResponse response = null;
        DefaultHttpClient client = null;
        String result = null;
        try {
            client = new DefaultHttpClient(new PoolingClientConnectionManager());
            httpPost = new HttpPost(loginUrl);
            // 设置请求头
            httpPost.setHeader("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0");
            // 设置参数
            List<NameValuePair> list = new ArrayList<NameValuePair>();
            Iterator iterator = createMap.entrySet().iterator();
            while (iterator.hasNext()) {
                Entry<String, String> elem = (Entry<String, String>) iterator.next();
                list.add(new BasicNameValuePair(elem.getKey(), elem.getValue()));
            }
            if (list.size() > 0) {
                UrlEncodedFormEntity entity = new UrlEncodedFormEntity(list, charset);
                httpPost.setEntity(entity);
            }
            // 第一次请求
            response = client.execute(httpPost);
            System.out.println(response);
            // 第二次请求
            httpPost2 = new HttpPost(SearchUrl);
            response = client.execute(httpPost2);
            System.out.println(response);
            // 登录后的请求内容
            if (response != null) {
                HttpEntity resEntity = response.getEntity();
                if (resEntity != null) {
                    result = EntityUtils.toString(resEntity, charset);
                }
            }
            // System.out.println(result);
        } catch (Exception ex) {
            ex.printStackTrace();
        }
    }

    public static void main(String[] args) {
        getCookie main = new getCookie();
        main.test();
    }
}
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值