用httpclient抓取网页的时候,不会自动解压数据,需要自行解压。
//当响应头中,content_encoding = "deflate" 的时候:
protected byte[] defalteUnCompress(byte[] src)throws Exception{
ByteArrayInputStream byteIn = new ByteArrayInputStream(src);
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
InflaterInputStream gzipIn = null;
byte[] content = new byte[1024];
int readLen = 0;
try{
Inflater inf = new Inflater(true);
//这个地方,直接new InflaterInputStream(in)就会出错
gzipIn = new InflaterInputStream(byteIn,inf);
while ((readLen = gzipIn.read(content)) != -1) {
byteOut.write(content, 0, readLen);
}
return byteOut.toByteArray();
}catch(Exception e){
throw e;
}finally{
try{
if(byteIn != null){
byteIn.close();
byteIn = null;
}
if(byteOut != null){
byteOut.close();
byteOut = null;
}
if(gzipIn != null){
gzipIn.close();
gzipIn = null;
}
}catch(Exception e){}
}
}
//当响应头中,content_encoding = "gzip,deflate" 的时候:
protected byte[] gzipUnCompress(byte[] src)throws Exception{
ByteArrayInputStream byteIn = new ByteArrayInputStream(src);
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
GZIPInputStream gzipIn = null;
byte[] content = new byte[1024];
int readLen = 0;
try{
gzipIn = new GZIPInputStream(byteIn);
while ((readLen = gzipIn.read(content)) != -1) {
byteOut.write(content, 0, readLen);
}
return byteOut.toByteArray();
}catch(Exception e){
throw e;
}finally{
try{
if(byteIn != null){
byteIn.close();
byteIn = null;
}
if(byteOut != null){
byteOut.close();
byteOut = null;
}
if(gzipIn != null){
gzipIn.close();
gzipIn = null;
}
}catch(Exception e){}
}
}