获取网页编码的方法,参照了http://huwanting000.blog.163.com/blog/static/49925122201110297549998/里的方法。
private static String getCharset(HttpURLConnection urlConnection) {
final Pattern char_pattern = Pattern
.compile("<meta[^>]+charset=(.+?)\"");
String strencoding = null;
BufferedReader in = null;
String type = urlConnection.getContentType();
try {
if (type != null) {
int pos = type.indexOf("charset=");
if (pos != -1) {
strencoding = type.substring(pos + 8).trim();
return strencoding;
}
}
Matcher char_matcher = null;
String line = null;
in = new BufferedReader(new InputStreamReader(
urlConnection.getInputStream()));
while ((line = in.readLine()) != null) {
char_matcher = char_pattern.matcher(line);
if (char_matcher.find()) {
String str = char_matcher.group(1);
str = str.trim();
strencoding = str;
return strencoding;
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
urlConnection.disconnect();
}
if (strencoding == null) {
strencoding = "GBK";
}
return strencoding;
}