要读取网页www.bnu.edu.cn的内容,用以下两种方式读取,第一种显示正常,第二种却总是乱码。请问第二种的读取问题出在了哪里?
方法一:
String url = "http://www.bnu.edu.cn";//www.bnu.edu.cn";
URL theUrl= new URL(url);
openStream = theUrl.openStream();
//
//构建输入流的的字符集必须和HTML源码中的 charset一致
bf = new BufferedReader(new InputStreamReader(openStream,"utf-8"));
String line = null;
while((line = bf.readLine())!=null) {
System.out.println(line);
}
方法二:
Socket webClient = new Socket ("www.bnu.edu.cn", 80);
PrintWriter result = new PrintWriter(webClient.getOutputStream(), true);
BufferedReader receiver = new BufferedReader (new InputStreamReader(webClient.getInputStream()));
result.println("GET / HTTP/1.1");
result.println("Host: bnu.edu.cn");
result.println("Connection: Close");
result.println();
boolean bRet = true;
StringBuffer sb = new StringBuffer (8096);
while (bRet) {
if (receiver.ready()) {
int idx = 0;
while (idx!=-1) {
idx = receiver.read();
sb.append((char)idx);
}
bRet = false;
}
}
byte [] b = (sb.toString()).getBytes("UTF-8");//"GBK", "ISO-88591-1";
String str = new String(b,"GB2312");
System.out.println(str);
webClient.close();