抓取网页HTML源码如下:
public
string
getHTML()
... {
string strResult = "";
try
...{
//抓取网页源码
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.baidu.com/");
request.Timeout = 30000;
request.Method = "GET";
//部分支持防盗链的网站,可以试下加如下代码
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream streamReceive = response.GetResponseStream();
//如果出现乱码可以试下GB2312或UTR-8
Encoding encoding = Encoding.GetEncoding("UTF-8");
StreamReader streamReader = new StreamReader(streamReceive, encoding);
strResult = streamReader.ReadToEnd();
}
catch
...{
MessageBox.Show("出错");
}
}
... {
string strResult = "";
try
...{
//抓取网页源码
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://www.baidu.com/");
request.Timeout = 30000;
request.Method = "GET";
//部分支持防盗链的网站,可以试下加如下代码
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24";
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream streamReceive = response.GetResponseStream();
//如果出现乱码可以试下GB2312或UTR-8
Encoding encoding = Encoding.GetEncoding("UTF-8");
StreamReader streamReader = new StreamReader(streamReceive, encoding);
strResult = streamReader.ReadToEnd();
}
catch
...{
MessageBox.Show("出错");
}
}
可能遇到的问题:
1、乱码,解决方法可以采用不同的编码方式试下
2、远程服务器错误:内部服务器错误(500) ,如果遇到这情况,可以试下加这句代码:
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24";