用java模拟curl利用cookie登录抓取一个受密码保护的新页面 ( by quqi99 )

标签: javastringencodingnullurlbyte
15034人阅读 评论(4) 收藏 举报
分类:

              用java模拟curl利用cookie登录抓取一个受密码保护的新页面 ( by quqi99 )



作者:张华 发表于:2011-01-11

版权声明:可以任意转载,转载时请务必以超链接形式标明文章原始出处和作者信息及本版权声明


        本文主要是如何灵活应用HttpURLConnection, 代码内容包括:

        1)以GET或POST方式获取网页正文及HTTP头信息

        2)调用登录接口进行登录,然后获取到登录后的cookie

        3) 用获取到的cookie去访问一个受密码保护的新页面。

        代码如下:



import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

import com.TripResearch.util.StringUtils;
import com.TripResearch.util.Utils;

/**
 *
 *
 * @author: huazhang
 * @since: 2011-4-15
 */

public class AutoTop
{

    public final static String CONTENT_TYPE = "Content-Type";

    public static Content curl(String method, String sUrl, Map<String, String> paramMap,
            Map<String, String> requestHeaderMap, boolean isOnlyReturnHeader)
    {
        Content content = null;
        HttpURLConnection httpUrlConnection = null;
        InputStream in = null;
        try
        {
            URL url = new URL(sUrl);
            boolean isPost = "POST".equals(method);
            if (Utils.isEmptyString(method) || (!"GET".equalsIgnoreCase(method) && !"POST".equalsIgnoreCase(method)))
            {
                method = "POST";
            }
            URL resolvedURL = url;
            if ("GET".equals(method) && !Utils.isEmptySafe(paramMap))
            {
                boolean firstParam = true;
                StringBuffer newUrlBuffer = new StringBuffer(url.toExternalForm());
                if (url.getQuery() == null)
                {
                    newUrlBuffer.append("?");
                }
                else
                {
                    newUrlBuffer.append("&");
                }
                for (Map.Entry<String, String> entry : paramMap.entrySet())
                {
                    String encName = URLEncoder.encode(entry.getKey(), StringUtils.ENC_DESC_UTF8);
                    if (firstParam)
                    {
                        firstParam = false;
                    }
                    else
                    {
                        newUrlBuffer.append("&");
                    }
                    String encValue = URLEncoder.encode(entry.getValue(), StringUtils.ENC_DESC_UTF8);
                    newUrlBuffer.append(encName);
                    newUrlBuffer.append("=");
                    newUrlBuffer.append(encValue);
                }
                resolvedURL = new java.net.URL(newUrlBuffer.toString());
            }

            URLConnection urlConnection = resolvedURL.openConnection();
            httpUrlConnection = (HttpURLConnection) urlConnection;
            httpUrlConnection.setRequestMethod(method);
            // Do not follow redirects, We will handle redirects ourself
            httpUrlConnection.setInstanceFollowRedirects(false);
            urlConnection.setDoOutput(true);
            urlConnection.setDoInput(true);
            urlConnection.setConnectTimeout(5000);
            urlConnection.setReadTimeout(5000);
            urlConnection.setUseCaches(false);
            urlConnection.setDefaultUseCaches(false);
            // set request header
            if (!Utils.isEmptySafe(requestHeaderMap))
            {
                for (Map.Entry<String, String> entry : requestHeaderMap.entrySet())
                {
                    String key = entry.getKey();
                    String val = entry.getValue();
                    if (key != null && val != null)
                    {
                        urlConnection.setRequestProperty(key, val);
                    }
                }
            }
            if (isPost)
            {
                urlConnection.setDoOutput(true);
                ByteArrayOutputStream bufOut = new ByteArrayOutputStream();
                boolean firstParam = true;
                for (Map.Entry<String, String> entry : paramMap.entrySet())
                {
                    String encName = URLEncoder.encode(entry.getKey(), StringUtils.ENC_DESC_UTF8);
                    if (firstParam)
                    {
                        firstParam = false;
                    }
                    else
                    {
                        bufOut.write((byte) '&');
                    }
                    String encValue = URLEncoder.encode(entry.getValue(), StringUtils.ENC_DESC_UTF8);
                    bufOut.write(encName.getBytes(StringUtils.ENC_DESC_UTF8));
                    bufOut.write((byte) '=');
                    bufOut.write(encValue.getBytes(StringUtils.ENC_DESC_UTF8));
                }
                byte[] postContent = bufOut.toByteArray();
                if (urlConnection instanceof HttpURLConnection)
                {
                    ((HttpURLConnection) urlConnection).setFixedLengthStreamingMode(postContent.length);
                }
                OutputStream postOut = urlConnection.getOutputStream();
                postOut.write(postContent);
                postOut.flush();
                postOut.close();
            }
            int responseCode = httpUrlConnection.getResponseCode();
            // We handle redirects ourself
            if (responseCode == HttpURLConnection.HTTP_MOVED_PERM || responseCode == HttpURLConnection.HTTP_MOVED_TEMP)
            {
                String location = httpUrlConnection.getHeaderField("Location");
                URL newAction = new URL(url, location);
                // Recurse
                StringBuffer newUrlSb = new StringBuffer(newAction.getProtocol() + "://" + newAction.getHost());
                if (newAction.getPort() != -1)
                {
                    newUrlSb.append(":" + newAction.getPort());
                }
                if (newAction.getPath() != null)
                {
                    newUrlSb.append(newAction.getPath());
                }
                if (newAction.getQuery() != null)
                {
                    newUrlSb.append("?" + newAction.getQuery());
                }
                if (newAction.getRef() != null)
                {
                    newUrlSb.append("#" + newAction.getRef());
                }
                return curl("GET", newUrlSb.toString(), null, requestHeaderMap, isOnlyReturnHeader);
            }
            else if (responseCode == HttpURLConnection.HTTP_OK || responseCode == HttpURLConnection.HTTP_CREATED)
            {
                byte[] bytes = new byte[0];
                if (!isOnlyReturnHeader)
                {
                    in = httpUrlConnection.getInputStream();
                    ByteArrayOutputStream bout = new ByteArrayOutputStream();
                    byte[] buf = new byte[1024];
                    while (true)
                    {
                        int rc = in.read(buf);
                        if (rc <= 0)
                        {
                            break;
                        }
                        else
                        {
                            bout.write(buf, 0, rc);
                        }
                    }
                    bytes = bout.toByteArray();
                    in.close();
                }
                // only fetch Content-Length and Last-Modified header
                String encoding = null;
                if (Utils.isEmptyString(encoding))
                {
                    encoding = getEncodingFromContentType(httpUrlConnection.getHeaderField(CONTENT_TYPE));
                }
                content = new Content(sUrl, new String(bytes, encoding), httpUrlConnection.getHeaderFields());
            }
        }
        catch (Exception e)
        {
            Utils.ERR(e);
            return null;
        }
        finally
        {
            if (httpUrlConnection != null)
            {
                httpUrlConnection.disconnect();
            }
        }
        return content;
    }

    public static String getEncodingFromContentType(String contentType)
    {
        String encoding = null;
        if (Utils.isEmptyString(contentType))
        {
            return null;
        }
        StringTokenizer tok = new StringTokenizer(contentType, ";");
        if (tok.hasMoreTokens())
        {
            tok.nextToken();
            while (tok.hasMoreTokens())
            {
                String assignment = tok.nextToken().trim();
                int eqIdx = assignment.indexOf('=');
                if (eqIdx != -1)
                {
                    String varName = assignment.substring(0, eqIdx).trim();
                    if ("charset".equalsIgnoreCase(varName))
                    {
                        String varValue = assignment.substring(eqIdx + 1).trim();
                        if (varValue.startsWith("/"") && varValue.endsWith("/""))
                        {
                            // substring works on indices
                            varValue = varValue.substring(1, varValue.length() - 1);
                        }
                        if (Charset.isSupported(varValue))
                        {
                            encoding = varValue;
                        }
                    }
                }
            }
        }
        if (Utils.isEmptyString(encoding))
        {
            return StringUtils.ENC_DESC_UTF8;
        }
        return encoding;
    }

    public static void main(String[] args)
    {
        // login
        String email = "";
        String pass = "";
        String loginUrl = "http://www.quqi.com/Login";
        String rateReviewUrl = "http://www.quqi.com/RateUserReview";
        Map<String, String> paramMap = new HashMap<String, String>();
        paramMap.put("email", email);
        paramMap.put("pass", pass);
        Content content = curl("POST", loginUrl, paramMap, null, false);

        // build request headers & do rate of user review
        List<String> cookieList = content.getHeaders().get("Set-Cookie");
        Map<String, String> requestHeaders = new HashMap<String, String>();  
        if (!Utils.isEmptySafe(cookieList))
        {
            StringBuffer sb = new StringBuffer();
            boolean isLast = false;
            int i = 0;
            for (String val : cookieList)
            {
                i++;
                if(i == cookieList.size())
                {
                    isLast = true;
                }
                int pos = val.indexOf("=");
                if (pos != -1)
                {
                    String cookieName = val.substring(0, pos);
                    String cookieVal = val.substring(pos + 1);
                    cookieVal = cookieVal.split(";")[0];
                    if(isLast)
                    {
                        sb.append(cookieName + "=" + cookieVal);
                    }else
                    {
                        sb.append(cookieName + "=" + cookieVal + ";");
                    }
                }
            }
            requestHeaders.put("Cookie", sb.toString());
        }
        paramMap = new HashMap<String, String>();
        paramMap.put("rateValue", "1");
        content = curl("POST", rateReviewUrl, paramMap, requestHeaders, false);

        System.out.println(content.getBody());
    }

}

class Content
{
    private String                    url;
    private String                    body;
    private Map<String, List<String>> m_mHeaders = new HashMap<String, List<String>>();

    public Content(String url, String body, Map<String, List<String>> headers)
    {
        this.url = url;
        this.body = body;
        this.m_mHeaders = headers;
    }

    public String getUrl()
    {
        return url;
    }

    public String getBody()
    {
        return body;
    }

    public Map<String, List<String>> getHeaders()
    {
        return m_mHeaders;
    }

}

0
0

猜你在找
【直播】机器学习&深度学习系统实战(唐宇迪)
【直播】Kaggle 神器:XGBoost 从基础到实战(冒教授)
【直播回放】深度学习基础与TensorFlow实践(王琛)
【直播】计算机视觉原理及实战(屈教授)
【直播】机器学习之凸优化(马博士)
【直播】机器学习之矩阵(黄博士)
【直播】机器学习之概率与统计推断(冒教授)
【直播】机器学习之数学基础
【直播】TensorFlow实战进阶(智亮)
【直播】深度学习30天系统实训(唐宇迪)
查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:1374182次
    • 积分:15602
    • 等级:
    • 排名:第665名
    • 原创:291篇
    • 转载:10篇
    • 译文:0篇
    • 评论:298条