package com.zuidaima.encoding.util;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import org.apache.http.util.ByteArrayBuffer;
import org.apache.http.util.EncodingUtils;
import org.junit.Test;
public class Main {
@Test
public void testZuidaima() throws Exception {
String url = "http://www.zuidaima.com/";
String html = parseHttpEncoding(url);
System.out.println(html);
}
@Test
public void testBaidu() throws Exception {
String url = "http://www.baidu.com/";
String html = parseHttpEncoding(url);
System.out.println(html);
}
@Test
public void testSinaGBK() throws Exception {
String url = "http://www.sina.com.cn/";
String html = parseHttpEncoding(url);
System.out.println(html);
}
@Test
public void testSinaJp() throws Exception {
String url = "http://sina.jp/";
String html = parseHttpEncoding(url);
System.out.println(html);
}
@Test
public void testGov() throws Exception {
String url = "http://www.gov.cn/";
String html = parseHttpEncoding(url);
System.out.println(html);
}
@Test
public void testBeijingGov() throws Exception {
String url = "http://www.beijing.gov.cn/";
String html = parseHttpEncoding(url);
System.out.println(html);
}
public String parseHttpEncoding(String url) throws Exception {
BytesEncodingDetect bytesEncodingDetect = new BytesEncodingDetect();
HttpURLConnection conn = null;
URL serverUrl = new URL(url);
conn = (HttpURLConnection) serverUrl.openConnection();
conn.setInstanceFollowRedirects(false);
conn.setRequestMethod("GET");
conn.setConnectTimeout(10000);
conn.setReadTimeout(10000);
conn.connect();
InputStream ins = conn.getInputStream();
String charset = "utf-8";
BufferedInputStream bis = new BufferedInputStream(ins);
ByteArrayBuffer baf = new ByteArrayBuffer(bis.available());
int data = 0;
while ((data = bis.read()) != -1) {
baf.append((byte) data);
}
int encodingGuess = bytesEncodingDetect.detectEncoding(baf
.toByteArray());
charset = BytesEncodingDetect.htmlname[encodingGuess];
String html = EncodingUtils.getString(baf.toByteArray(), charset); // 转换为字符串
conn.disconnect();
System.out.println(url + ":" + charset);
return html;
}
}