本文给出java根据网址获得html源码的代码,适用于windows和linux,会持续更新。
所需头文件:
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
代码部分:
/**
* 抓取某个网页的源代码
* @param urlstr 要抓取网页的地址
* @param charset 网页所使用的编码 如"utf-8","gbk"
* @return
* @throws IOException
*/
public static String fetchHtml(String urlstr, String charset)
throws IOException {
URL url = new URL(urlstr);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
InputStream is = con.getInputStream();
InputStreamReader isr = new InputStreamReader(is, charset);
String result = "";
int read;
while ((read = isr.read()) != -1) {
result += (char) read;
}
isr.close();
return result;
}