Httpclient gzip 乱码问题解决

最新推荐文章于 2024-07-09 14:42:08 发布

sun0322

最新推荐文章于 2024-07-09 14:42:08 发布

阅读量2.3k

点赞数

分类专栏： java # 服务器配置、网络相关文章标签： Httpclient gzip 乱码问题解决

本文链接：https://blog.csdn.net/sxzlc/article/details/103439160

版权

java 同时被 2 个专栏收录

83 篇文章 4 订阅

订阅专栏

服务器配置、网络相关

48 篇文章 2 订阅

订阅专栏

■出现问题的原因推测

被反爬了,缺少了cookie,你请求出来的信息就是运行一段js,

生成cookie,看到args1了么,这个是密钥,下面的也不是编码的,就是js混淆的问题

防爬网站需要携带一些基础http头模拟成浏览器登录

https://www.jianshu.com/p/401a25134b89

■前言

以下代码运行的返回值

■代码

package com.sxz.timecontroal;
 
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URLDecoder;
import java.util.zip.GZIPInputStream;

import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
 
 
public class CheckTimeWithNet {
 
 
    static final String LOGINURL     = "https://blog.csdn.net/sxzlc/article/list/3";
 
    public static void main(final String[] args) {
 
        final DefaultHttpClient httpclient = new DefaultHttpClient();
 
        final HttpGet httpGet = new HttpGet(LOGINURL);
        HttpResponse response = null;
 
        try {
            httpGet.addHeader("Accept-Encoding", "gzip, deflate"); 
            response = httpclient.execute(httpGet); 
        } catch (final ClientProtocolException cpException) {
        } catch (final IOException ioException) {
        }
 
        // verify response is HTTP OK
        final int statusCode = response.getStatusLine().getStatusCode();
        if (statusCode != HttpStatus.SC_OK) {
            System.out.println("Error authenticating to Force.com: "+statusCode);
            return;
        }
 
        System.out.println("---------------------Status code Info Start---------------------");
        System.out.println(response.getStatusLine());
        System.out.println("---------------------Status code Info end  ---------------------");
        System.out.println("---------------------Head Info Start---------------------");
        final Header[] hs = response.getAllHeaders();
        for(final Header h:hs){
         System.out.println(h.getName() + ":" + h.getValue());
        }
        System.out.println("---------------------Head Info End  ---------------------");
 
        String getResult = null;
        try {
            // response.setEntity(new GzipDecompressingEntity(response.getEntity())); 
            // getResult = EntityUtils.toString(response.getEntity(),"UTF-8");
            getResult = getStringFromResponseUzip(response);
        } catch (final Exception ioException) {
            // Handle system IO exception
        }
		System.out.println(getResult);
 
    }
 
    public static String getStringFromResponseUzip(final HttpResponse response) throws Exception {
        if (response == null) {
            return null;
        }
        String responseText = "";
        //InputStream in = response.getEntity().getContent();
        final InputStream in = response.getEntity().getContent();
        final Header[] headers = response.getHeaders("Content-Encoding");
        for(final Header h : headers){
            System.out.println(h.getValue());
            if(h.getValue().indexOf("gzip") > -1){
                //For GZip response
                try{
                    final GZIPInputStream gzin = new GZIPInputStream(in);
                    final InputStreamReader isr = new InputStreamReader(gzin,"UTF-8");
                    responseText = getStringFromStream(isr);
                    //responseText = URLDecoder.decode(responseText, "utf-8");
                }catch (final IOException exception){
                    exception.printStackTrace();
                }
                return responseText;
            }
        }
        responseText = EntityUtils.toString(response.getEntity(),"utf-8");
        return responseText;
    }
 
    public static String getStringFromStream(final InputStreamReader isr) throws Exception{
        final BufferedReader br = new BufferedReader(isr);
        final StringBuilder sb = new StringBuilder();
        String tmp;
        while((tmp = br.readLine())!=null){
            sb.append(tmp);
            sb.append("\r\n");
        }
        br.close();
        isr.close();
        return sb.toString();
    }
}

■运行结果

---------------------Status code Info Start---------------------
HTTP/1.1 200 OK
---------------------Status code Info end ---------------------
---------------------Head Info Start---------------------
Server:Tengine
Date:Sat, 07 Dec 2019 12:20:38 GMT
Content-Type:text/html; charset=utf-8
Transfer-Encoding:chunked
Connection:keep-alive
Set-Cookie:acw_tc=2760820215757212385795097e52a909ebbcda96b20e30f4c216c0bfbc89e6;path=/;HttpOnly;Max-Age=2678401
Content-Encoding:gzip
cache-control:no-cache, no-store
Pragma:no-cache
Strict-Transport-Security:max-age=86400
---------------------Head Info End ---------------------
gzip
<html><script>
var arg1='70EBF8B68AD7946E52DB795B887AEDFC88D2C6E3';
var _0x4818=['\x63\x73\x4b\x48\x77\x71\x4d\x49',

。。。

();try{return!!window['\x61\x64\x64\x45\x76\x65\x6e\x74\x4c\x69\x73\x74\x65\x6e\x65\x72'];}catch(_0x35538d){return![];}}()){document[_0x55f3('0x33', '\x56\x25\x59\x52')](_0x55f3('0x34', '\x79\x41\x70\x7a'),l,![]);}else{document[_0x55f3('0x36', '\x79\x41\x70\x7a')](_0x55f3('0x37', '\x4c\x24\x28\x44'),l);}_0x4db1c();setInterval(function(){_0x4db1c();},0xfa0);

function setCookie(name,value){var expiredate=new Date();expiredate.setTime(expiredate.getTime()+(3600*1000));document.cookie=name+"="+value+";expires="+expiredate.toGMTString()+";max-age=3600;path=/";}
function reload(x) {setCookie("acw_sc__v2", x);document.location.reload();}
</script></html>

■后续

解压后为16进制代码，有待解决。。。

\x65 z

这是 URLENCODE造成的，使用URLDECODE解决

感谢，[gybao]大神的帮助

https://bbs.csdn.net/topics/395274030

但是，没有使用URLDECODE，之前的代码，在运行一下，竟然直接成功了。

但是，我之前是怎么跑出这种效果的，原因不明。。。　推测问题的原因在下面记述

■再次修改后的代码

对于目前最新代码的说明

当能进入到下面79行的分支中时，不论有没有85行都不会出现乱码问题。

代码

package com.sxz.timecontroal;
 
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URLDecoder;
import java.util.zip.GZIPInputStream;

import org.apache.http.Header;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
 
 
public class CheckTimeWithNet {
 
 
    //static final String LOGINURL     = "https://blog.csdn.net/sxzlc?orderby=ViewCount";
    static final String LOGINURL     = "https://blog.csdn.net/sxzlc/article/list/2?orderby=ViewCount";
 
    
    public static void main(final String[] args) {
 
        final DefaultHttpClient httpclient = new DefaultHttpClient();
 
        final HttpGet httpGet = new HttpGet(LOGINURL);
        HttpResponse response = null;
 
        try {
            httpGet.addHeader("Accept-Encoding", "gzip, deflate"); 
            response = httpclient.execute(httpGet); 
        } catch (final ClientProtocolException cpException) {
        } catch (final IOException ioException) {
        }
 
        // verify response is HTTP OK
        final int statusCode = response.getStatusLine().getStatusCode();
        if (statusCode != HttpStatus.SC_OK) {
            System.out.println("Error authenticating to Force.com: "+statusCode);
            return;
        }
 
        System.out.println("---------------------Status code Info Start---------------------");
        System.out.println(response.getStatusLine());
        System.out.println("---------------------Status code Info end  ---------------------");
        System.out.println("---------------------Head Info Start---------------------");
        final Header[] hs = response.getAllHeaders();
        for(final Header h:hs){
         System.out.println(h.getName() + ":" + h.getValue());
        }
        System.out.println("---------------------Head Info End  ---------------------");
 
        String getResult = null;
        try {
            // response.setEntity(new GzipDecompressingEntity(response.getEntity())); 
            // getResult = EntityUtils.toString(response.getEntity(),"UTF-8");
            getResult = getStringFromResponseUzip(response);
        } catch (final Exception ioException) {
            // Handle system IO exception
        }
		System.out.println(getResult);
 
    }
 
    public static String getStringFromResponseUzip(final HttpResponse response) throws Exception {
        if (response == null) {
            return null;
        }
        String responseText = "";
        //InputStream in = response.getEntity().getContent();
        final InputStream in = response.getEntity().getContent();
        final Header[] headers = response.getHeaders("Content-Encoding");
        for(final Header h : headers){
            System.out.println(h.getValue());
            if(h.getValue().indexOf("gzip") > -1){
                //For GZip response
                try{
                    final GZIPInputStream gzin = new GZIPInputStream(in);
                    final InputStreamReader isr = new InputStreamReader(gzin,"UTF-8");
                    responseText = getStringFromStream(isr);
                    responseText = URLDecoder.decode(responseText, "UTF-8");
                }catch (final IOException exception){
                    exception.printStackTrace();
                }
                System.out.println("---------------------is gzip---------------------");
                return responseText;
            }
        }
        System.out.println("---------------------is not gzip---------------------");
        responseText = EntityUtils.toString(response.getEntity(),"utf-8");
        return responseText;
    }
 
    public static String getStringFromStream(final InputStreamReader isr) throws Exception{
        final BufferedReader br = new BufferedReader(isr);
        final StringBuilder sb = new StringBuilder();
        String tmp;
        while((tmp = br.readLine())!=null){
            sb.append(tmp);
            sb.append("\r\n");
        }
        br.close();
        isr.close();
        return sb.toString();
    }
}

以上代码运行后的结果