如何用Java获取网页源代码

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;


public class HtmlParser {
public static String getHtmlContent(URL url, String encode) {
StringBuffer contentBuffer = new StringBuffer();


int responseCode = -1;
HttpURLConnection con = null;
try {
con = (HttpURLConnection) url.openConnection();
con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
con.setConnectTimeout(60000);
con.setReadTimeout(60000);
// 获得网页返回信息码
responseCode = con.getResponseCode();
if (responseCode == -1) {
System.out.println(url.toString() + " : connection is failure...");
con.disconnect();
return null;
}
if (responseCode >= 400) // 请求失败
{
System.out.println("请求失败:get response code: " + responseCode);
con.disconnect();
return null;
}


InputStream inStr = con.getInputStream();
InputStreamReader istreamReader = new InputStreamReader(inStr, encode);
BufferedReader buffStr = new BufferedReader(istreamReader);


String str = null;
while ((str = buffStr.readLine()) != null)
contentBuffer.append(str);
inStr.close();
} catch (IOException e) {
e.printStackTrace();
contentBuffer = null;
System.out.println("error: " + url.toString());
} finally {
con.disconnect();
}
return contentBuffer.toString();
}


public static String getHtmlContent(String url, String encode) {
if (!url.toLowerCase().startsWith("http://")) {
url = "http://" + url;
}
try {
URL rUrl = new URL(url);
return getHtmlContent(rUrl, encode);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static void main(String argsp[]){
System.out.println(getHtmlContent("http://www.baidu.com","utf-8")) ;

}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值