从网络中获取网页数据时,网页有可能使用GZIP压缩技术对页面进行压缩,这样就会减小通过网络传输的数据量,提高浏览的速度。因此在获取网络数据时要对其进行判断,对GZIP格式的数据使用GZIPInputStream对其特殊处理,否则在获取数据可能出现乱码.
import java.io.ByteArrayOutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.zip.GZIPInputStream;
import com.lowagie.text.pdf.codec.Base64.InputStream;
/**
* 从网络中获取网页数据
* @author 杜文俊
*/
public class stretest {
@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
String result = "";
URL url = new URL("http://www.yancao18.com/server/getnode.php?user=test&pwd=test");
//URL url = new URL("http://www.ku6.com/");
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setConnectTimeout(6* 1000);//设置连接超时xml 实体对象
if (conn.getResponseCode() != 200) throw new RuntimeException("请求url失败");
InputStream inputStream = (InputStream) conn.getInputStream();//得到网络返回的输入流
if("gzip".equals(conn.getContentEncoding())){
result = readDataForZgip(inputStream, "GBK");
}else {
result = readData(inputStream, "GBK");
}
conn.disconnect();
System.out.println(result);
System.err.println("ContentEncoding: " + conn.getContentEncoding());
}
//第一个参数为输入流,第二个参数为字符集编码
public static String readData(InputStream inSream, String charsetName) throws Exception{
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int len = -1;
while( (len = inSream.read(buffer)) != -1 ){
outStream.write(buffer, 0, len);
}
byte[] data = outStream.toByteArray();
outStream.close();
inSream.close();
return new String(data, charsetName);
}
//第一个参数为输入流,第二个参数为字符集编码
public static String readDataForZgip(InputStream inStream, String charsetName) throws Exception{
GZIPInputStream gzipStream = new GZIPInputStream(inStream);
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
byte[] buffer =new byte[1024];
int len = -1;
while ((len = gzipStream.read(buffer))!=-1) {
outStream.write(buffer, 0, len);
}
byte[] data = outStream.toByteArray();
outStream.close();
gzipStream.close();
inStream.close();
return new String(data, charsetName);
}
}