用java模拟curl利用cookie登录抓取一个受密码保护的新页面 ( by quqi99 )

              用java模拟curl利用cookie登录抓取一个受密码保护的新页面 ( by quqi99 )



作者:张华 发表于:2011-01-11

版权声明:可以任意转载,转载时请务必以超链接形式标明文章原始出处和作者信息及本版权声明


        本文主要是如何灵活应用HttpURLConnection, 代码内容包括:

        1)以GET或POST方式获取网页正文及HTTP头信息

        2)调用登录接口进行登录,然后获取到登录后的cookie

        3) 用获取到的cookie去访问一个受密码保护的新页面。

        代码如下:



import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import com.TripResearch.util.StringUtils;
import com.TripResearch.util.Utils;

/**
 *
 *
 * @author: huazhang
 * @since: 2011-4-15
 */

public class AutoTop
{

    public final static String CONTENT_TYPE = "Content-Type";

    public static Content curl(String method, String sUrl, Map<String, String> paramMap,
            Map<String, String> requestHeaderMap, boolean isOnlyReturnHeader)
    {
        Content content = null;
        HttpURLConnection httpUrlConnection = null;
        InputStream in = null;
        try
        {
            URL url = new URL(sUrl);
            boolean isPost = "POST".equals(method);
            if (Utils.isEmptyString(method) || (!"GET".equalsIgnoreCase(method) && !"POST".equalsIgnoreCase(method)))
            {
                method = "POST";
            }
            URL resolvedURL = url;
            if ("GET".equals(method) && !Utils.isEmptySafe(paramMap))
            {
                boolean firstParam = true;
                StringBuffer newUrlBuffer = new StringBuffer(url.toExternalForm());
                if (url.getQuery() == null)
                {
                    newUrlBuffer.append("?");
                }
                else
                {
                    newUrlBuffer.append("&");
                }
                for (Map.Entry<String, String> entry : paramMap.entrySet())
                {
                    String encName = URLEncoder.encode(entry.getKey(), StringUtils.ENC_DESC_UTF8);
                    if (firstParam)
                    {
                        firstParam = false;
                    }
                    else
                    {
                        newUrlBuffer.append("&");
                    }
                    String encValue = URLEncoder.encode(entry.getValue(), StringUtils.ENC_DESC_UTF8);
                    newUrlBuffer.append(encName);
                    newUrlBuffer.append("=");
                    newUrlBuffer.append(encValue);
                }
                resolvedURL = new java.net.URL(newUrlBuffer.toString());
            }

            URLConnection urlConnection = resolvedURL.openConnection();
            httpUrlConnection = (HttpURLConnection) urlConnection;
            httpUrlConnection.setRequestMethod(method);
            // Do not follow redirects, We will handle redirects ourself
            httpUrlConnection.setInstanceFollowRedirects(false);
            urlConnection.setDoOutput(true);
            urlConnection.setDoInput(true);
            urlConnection.setConnectTimeout(5000);
            urlConnection.setReadTimeout(5000);
            urlConnection.setUseCaches(false);
            urlConnection.setDefaultUseCaches(false);
            // set request header
            if (!Utils.isEmptySafe(requestHeaderMap))
            {
                for (Map.Entry<String, String> entry : requestHeaderMap.entrySet())
                {
                    String key = entry.getKey();
                    String val = entry.getValue();
                    if (key != null && val != null)
                    {
                        urlConnection.setRequestProperty(key, val);
                    }
                }
            }
            if (isPost)
            {
                urlConnection.setDoOutput(true);
                ByteArrayOutputStream bufOut = new ByteArrayOutputStream();
                boolean firstParam = true;
                for (Map.Entry<String, String> entry : paramMap.entrySet())
                {
                    String encName = URLEncoder.encode(entry.getKey(), StringUtils.ENC_DESC_UTF8);
                    if (firstParam)
                    {
                        firstParam = false;
                    }
                    else
                    {
                        bufOut.write((byte) '&');
                    }
                    String encValue = URLEncoder.encode(entry.getValue(), StringUtils.ENC_DESC_UTF8);
                    bufOut.write(encName.getBytes(StringUtils.ENC_DESC_UTF8));
                    bufOut.write((byte) '=');
                    bufOut.write(encValue.getBytes(StringUtils.ENC_DESC_UTF8));
                }
                byte[] postContent = bufOut.toByteArray();
                if (urlConnection instanceof HttpURLConnection)
                {
                    ((HttpURLConnection) urlConnection).setFixedLengthStreamingMode(postContent.length);
                }
                OutputStream postOut = urlConnection.getOutputStream();
                postOut.write(postContent);
                postOut.flush();
                postOut.close();
            }
            int responseCode = httpUrlConnection.getResponseCode();
            // We handle redirects ourself
            if (responseCode == HttpURLConnection.HTTP_MOVED_PERM || responseCode == HttpURLConnection.HTTP_MOVED_TEMP)
            {
                String location = httpUrlConnection.getHeaderField("Location");
                URL newAction = new URL(url, location);
                // Recurse
                StringBuffer newUrlSb = new StringBuffer(newAction.getProtocol() + "://" + newAction.getHost());
                if (newAction.getPort() != -1)
                {
                    newUrlSb.append(":" + newAction.getPort());
                }
                if (newAction.getPath() != null)
                {
                    newUrlSb.append(newAction.getPath());
                }
                if (newAction.getQuery() != null)
                {
                    newUrlSb.append("?" + newAction.getQuery());
                }
                if (newAction.getRef() != null)
                {
                    newUrlSb.append("#" + newAction.getRef());
                }
                return curl("GET", newUrlSb.toString(), null, requestHeaderMap, isOnlyReturnHeader);
            }
            else if (responseCode == HttpURLConnection.HTTP_OK || responseCode == HttpURLConnection.HTTP_CREATED)
            {
                byte[] bytes = new byte[0];
                if (!isOnlyReturnHeader)
                {
                    in = httpUrlConnection.getInputStream();
                    ByteArrayOutputStream bout = new ByteArrayOutputStream();
                    byte[] buf = new byte[1024];
                    while (true)
                    {
                        int rc = in.read(buf);
                        if (rc <= 0)
                        {
                            break;
                        }
                        else
                        {
                            bout.write(buf, 0, rc);
                        }
                    }
                    bytes = bout.toByteArray();
                    in.close();
                }
                // only fetch Content-Length and Last-Modified header
                String encoding = null;
                if (Utils.isEmptyString(encoding))
                {
                    encoding = getEncodingFromContentType(httpUrlConnection.getHeaderField(CONTENT_TYPE));
                }
                content = new Content(sUrl, new String(bytes, encoding), httpUrlConnection.getHeaderFields());
            }
        }
        catch (Exception e)
        {
            Utils.ERR(e);
            return null;
        }
        finally
        {
            if (httpUrlConnection != null)
            {
                httpUrlConnection.disconnect();
            }
        }
        return content;
    }

    public static String getEncodingFromContentType(String contentType)
    {
        String encoding = null;
        if (Utils.isEmptyString(contentType))
        {
            return null;
        }
        StringTokenizer tok = new StringTokenizer(contentType, ";");
        if (tok.hasMoreTokens())
        {
            tok.nextToken();
            while (tok.hasMoreTokens())
            {
                String assignment = tok.nextToken().trim();
                int eqIdx = assignment.indexOf('=');
                if (eqIdx != -1)
                {
                    String varName = assignment.substring(0, eqIdx).trim();
                    if ("charset".equalsIgnoreCase(varName))
                    {
                        String varValue = assignment.substring(eqIdx + 1).trim();
                        if (varValue.startsWith("/"") && varValue.endsWith("/""))
                        {
                            // substring works on indices
                            varValue = varValue.substring(1, varValue.length() - 1);
                        }
                        if (Charset.isSupported(varValue))
                        {
                            encoding = varValue;
                        }
                    }
                }
            }
        }
        if (Utils.isEmptyString(encoding))
        {
            return StringUtils.ENC_DESC_UTF8;
        }
        return encoding;
    }

    public static void main(String[] args)
    {
        // login
        String email = "";
        String pass = "";
        String loginUrl = "http://www.quqi.com/Login";
        String rateReviewUrl = "http://www.quqi.com/RateUserReview";
        Map<String, String> paramMap = new HashMap<String, String>();
        paramMap.put("email", email);
        paramMap.put("pass", pass);
        Content content = curl("POST", loginUrl, paramMap, null, false);

        // build request headers & do rate of user review
        List<String> cookieList = content.getHeaders().get("Set-Cookie");
        Map<String, String> requestHeaders = new HashMap<String, String>();  
        if (!Utils.isEmptySafe(cookieList))
        {
            StringBuffer sb = new StringBuffer();
            boolean isLast = false;
            int i = 0;
            for (String val : cookieList)
            {
                i++;
                if(i == cookieList.size())
                {
                    isLast = true;
                }
                int pos = val.indexOf("=");
                if (pos != -1)
                {
                    String cookieName = val.substring(0, pos);
                    String cookieVal = val.substring(pos + 1);
                    cookieVal = cookieVal.split(";")[0];
                    if(isLast)
                    {
                        sb.append(cookieName + "=" + cookieVal);
                    }else
                    {
                        sb.append(cookieName + "=" + cookieVal + ";");
                    }
                }
            }
            requestHeaders.put("Cookie", sb.toString());
        }
        paramMap = new HashMap<String, String>();
        paramMap.put("rateValue", "1");
        content = curl("POST", rateReviewUrl, paramMap, requestHeaders, false);

        System.out.println(content.getBody());
    }

}

class Content
{
    private String                    url;
    private String                    body;
    private Map<String, List<String>> m_mHeaders = new HashMap<String, List<String>>();

    public Content(String url, String body, Map<String, List<String>> headers)
    {
        this.url = url;
        this.body = body;
        this.m_mHeaders = headers;
    }

    public String getUrl()
    {
        return url;
    }

    public String getBody()
    {
        return body;
    }

    public Map<String, List<String>> getHeaders()
    {
        return m_mHeaders;
    }

}

评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

quqi99

你的鼓励就是我创造的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值