.net 抓取html,ASP.NET抓取网页数据

ASP.NET抓取网页数据

//根据Url地址得到网页的html源码

private string GetWebContent(string Url)

{

string strResult = "";

try

{

HttpWebRequest request = (HttpWebRequest)WebRequest.Create(Url);

//声明一个HttpWebRequest请求

request.Timeout = 30000;

//设置连接超时时间

request.Headers.Set("Pragma", "no-cache");

HttpWebResponse response = (HttpWebResponse)request.GetResponse();

Stream streamReceive = response.GetResponseStream();

Encoding encoding = Encoding.GetEncoding("utf-8");

StreamReader streamReader = new StreamReader(streamReceive, encoding);

strResult = streamReader.ReadToEnd();

}

catch

{

MessageBox.Show("出错");

}

return strResult;

}

private void button1_Click(object sender, EventArgs e)

{

//要抓取的URL地址

string Url = "http://kxt.com/data/20.html"; //☆☆☆☆☆☆☆☆☆☆

//得到指定Url的源码

string strWebContent = GetWebContent(Url);

richTextBox1.Text = strWebContent;

//取出和数据有关的那段源码

int iBodyStart = strWebContent.IndexOf("

int iStart = strWebContent.IndexOf("历史数据", iBodyStart);

int iTableStart = strWebContent.IndexOf("

int iTableEnd = strWebContent.IndexOf("

", iTableStart);

string strWeb = strWebContent.Substring(iTableStart, iTableEnd - iTableStart + 5);

//生成HtmlDocument

WebBrowser webb = new WebBrowser();

webb.Navigate("about:blank");

HtmlDocument htmldoc = webb.Document.OpenNew(true);

htmldoc.Write(strWeb);

HtmlElementCollection htmlTR = htmldoc.GetElementsByTagName("li");

int i = 0;

foreach (HtmlElement tr in htmlTR)

{

i++;

if (i == 1 )

{

continue;

}

if (i == htmlTR.Count - 2)

{

break;

}

HtmlElementCollection spans = tr.GetElementsByTagName("span");

string dateTime = spans[0].InnerText;

string netWeightOunce = spans[1].InnerText;

string netWeightTon = spans[2].InnerText;

string totalValue = spans[3].InnerText;

string regulation = spans[4].InnerText;

//string affectOil = spans[5].InnerText;

//Id, UpdateTime, NetWeightOunce, NetWeightTon, TotalValue, Regulation, FinanceTime

SqlServer ado=new SqlServer();

ado.AddField("UpdateTime",DateTime.Now);

ado.AddField("NetWeightOunce", netWeightOunce);

ado.AddField("NetWeightTon", netWeightTon);

ado.AddField("TotalValue", totalValue);

// ado.AddField("EffectOil", affectOil);

ado.AddField("Regulation", regulation);

ado.AddField("FinanceTime", Convert.ToDateTime(dateTime).ToString("yyyy-MM-dd"));//☆☆☆☆☆☆☆☆☆☆

ado.Insert("Silver");//☆☆☆☆☆☆☆☆☆☆

}

MessageBox.Show("OK");

}

例子:

using System.Web;

using System.Web.UI;

using System.Web.UI.WebControls;

using System.Net;

using System.IO;

using System.Text;

public partial class _Default : System.Web.UI.Page

{

protected void Page_Load(object sender, EventArgs e)

{

//要抓取的URL地址

string StrUrl = "http://www.cjh.com.cn/sqindex.html";

//得到指定Url的源码

string StrWebContent = GetWebContent(StrUrl);

//取出和数据有关的那段源码

int IndexBodyStart = StrWebContent.IndexOf("

int IndexRiverStart = StrWebContent.IndexOf("长江", IndexBodyStart);

int IndexStationStart = StrWebContent.IndexOf("寸滩", IndexRiverStart);

int IndexWaterLevelStart = StrWebContent.IndexOf("z", IndexStationStart);

int IndexWaterLevelEnd = StrWebContent.IndexOf("}", IndexWaterLevelStart);

string StrWaterLevel = StrWebContent.Substring(IndexWaterLevelStart + 4, (IndexWaterLevelEnd - 1) - (IndexWaterLevelStart + 4));

Label_Water_Data.Text = StrWaterLevel+"米";

}

private string GetWebContent(string Url)

{

string StrResult = "";

try

{

//声明一个HttpWebRequest请求

HttpWebRequest Request = (HttpWebRequest)WebRequest.Create(Url);

//设置连接超时时间

Request.Timeout = 30000;

Request.Headers.Set("Pragma", "no-cache");

HttpWebResponse response = (HttpWebResponse)Request.GetResponse();

Stream streamReceive = response.GetResponseStream();

Encoding encoding = Encoding.GetEncoding("utf-8");

StreamReader streamReader = new StreamReader(streamReceive, encoding);

StrResult = streamReader.ReadToEnd();

}

catch

{

Response.Output.Write("出错啦!");

}

return StrResult;

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值