public static string getPageInfo(String url)
{
WebResponse wr_result = null;
StringBuilder txthtml = new StringBuilder();
try
{
WebRequest wr_req = WebRequest.Create(url);
wr_result = wr_req.GetResponse();
Stream ReceiveStream = wr_result.GetResponseStream();
//Encoding encode = System.Text.Encoding.GetEncoding("gb2312");
//Encoding encode = Encoding.Unicode;
Encoding encode = Encoding.UTF8;
StreamReader sr = new StreamReader(ReceiveStream, encode);
if (true)
{
Char[] read = new Char[256];
int count = sr.Read(read, 0, 256);
while (count > 0)
{
String str = new String(read, 0, count);
txthtml.Append(str);
count = sr.Read(read, 0, 256);
}
}
}
catch (Exception)
{
txthtml.Append("err");
}
finally
{
if (wr_result != null)
{
wr_result.Close();
}
}
return txthtml.ToString();
}
参考:
http://hi.baidu.com/qljwm/item/5122708b41ad0655e73d1904
http://www.cnblogs.com/Jiajun/archive/2012/06/16/2552103.html 网络爬虫
http://www.dezai.cn/Channel/Web/ArticleShow.aspx?AI=76467 C#抓取网页数据、分析并且去除HTML标签
http://blog.csdn.net/felomeng/article/details/1522292 爬虫/蜘蛛程序的制作(C#语言)
http://blog.csdn.net/felomeng/article/details/1522892 蜘蛛/爬虫程序的多线程控制(C#语言)
http://www.dezai.cn/Channel/Web/ArticleShow.aspx?AI=76467 C#抓取网页源代码(HTML代码)方法
http://www.cnblogs.com/falcon-fei/archive/2012/02/29/2379881.html c#关于网页内容抓取,简单爬虫的实现。(包括动态,静态的)
http://www.189works.com/article-43227-1.html C# 抓取网页内容(转)