只要指明编码格式,就能正确的读取。
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.CharBuffer;
public class PageParser {
public static void main(String args[]) {
BufferedReader in = null;
try {
in = new BufferedReader(new InputStreamReader(new URL(
"http://news.cnool.net/0-1-19/35941/3.html").openStream(),
"utf-8"));
} catch (Exception e2) {
e2.printStackTrace();
}
CharBuffer bos = CharBuffer.allocate(20480);
int read = 0;
StringBuilder sb = new StringBuilder();
try {
while (in.read(bos) != -1) {
bos.flip();
sb.append(bos.toString());
}
} catch (IOException e1) {
e1.printStackTrace();
}
System.out.println(sb.toString());
}
}