如何獲取http://cheman.chemnet.com/dict/zd.html網頁中cas號中的信息為例
第一步:到http://htmlagilitypack.codeplex.com/網上下載簡單好用的快速 HTML Parser,HtmlAgilityPack.dll文件
第二步:到VS中建一個控制台應用程式,將HtmlAgilityPack.dll文件引用。
第三步:在CS文件中寫入如下代碼
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;
using System.Net;
using System.IO;
namespace Cas20080503
{
class Program
{
static StringBuilder sb = new StringBuilder();
static void Main(string[] args)
{
int a = 0;
for (int i = 1; i < 394; i++)
{
string url = "http://cheman.chemnet.com/dict/cas/" + i + ".html";
GetNode(url);
a++;
}
System.IO.File.WriteAllText(@"d:\test.txt", sb.ToString());//將信息寫到txt文件中
Console.WriteLine("ok a="+a);
Console.ReadLine();
}
public static void GetNode(string url)
{
// 下載
WebClient client = new WebClient();
//以下是設置代理
WebProxy wp = new WebProxy("192.168.0.83", 80);
NetworkCredential nc = new NetworkCredential("auyeungck", "it_auyeung41");
client.Proxy = wp;
client.Proxy.Credentials = nc;
//將網頁上的內容放到内存流中
MemoryStream ms = new MemoryStream(client.DownloadData(url));
// client.DownloadFileAsync(uri, Path.Combine(saveDir, saveName));
// 使用預設編碼讀入 HTML
HtmlDocument doc = new HtmlDocument();
doc.Load(ms, Encoding.GetEncoding(936));//此處的936能解決中文亂碼問題
// 裝載第一層查詢結果
HtmlDocument docStockContext = new HtmlDocument();
docStockContext.LoadHtml(doc.DocumentNode.SelectSingleNode(
"/html[1]/body[1]/div[3]/div[2]/div[2]/div[1]/dl[1]").InnerHtml);
// 取標頭
// HtmlNodeCollection nodeHeaders =docStockContext.DocumentNode.SelectNodes("./dt[1]/p");
// 取得數值
HtmlNodeCollection nodes = docStockContext.DocumentNode.SelectNodes(
"./dd[1]/ul[1]");
// 輸出資料
foreach (HtmlNode nodeses in nodes)
{
Console.WriteLine("nodes: {0}", nodeses.InnerText);
// Console.WriteLine("nodes: {0}", nodeses.SelectNodes("./li[1]/a[1]"));
sb.AppendLine(string.Format("{0}", nodeses.OuterHtml));//將信息寫到字符串對象中
// sb.AppendLine(string.Format("{0}", nodeses.OwnerDocument));
}
// System.IO.File.WriteAllText(@"d:\test.txt", sb.ToString());
doc = null;
docStockContext = null;
client = null;
ms.Close();
Console.WriteLine("Completed.");
// Console.ReadLine();
}
}
}
參考網站http://msdn.microsoft.com/zh-tw/ee787055.aspx