1、使用C#中的WebRequest或HttpWebRequest創建WEB實例訪問網頁,或使用WebClient訪問頁面。
2、通過正則表達式匹配字符串獲取要採集的數據。
3、簡單運算得出要採集的字符串。
使用WebRequest函數的C#採集器實例代碼如下(精簡):
- using System;
- using System.Collections.Generic;
- using System.ComponentModel;
- using System.Data;
- using System.Drawing;
- using System.Text;
- using System.Windows.Forms;
- using System.IO;
- using System.Net;
- using System.Text.RegularExpressions;
- private void button2_Click(object sender, EventArgs e) //單擊按鈕操作
- {
- WebRequest req = WebRequest.Create("http://www.baidu.com/");
- WebResponse result = req.GetResponse();
- //Stream s = result.GetResponseStream();
- StreamReader sr = new StreamReader(result.GetResponseStream(),System.Text.Encoding.GetEncoding("gb2312")); //創建流讀取器並設置編碼
- string content = sr.ReadToEnd(); //讀取所有字符串至變量content
- string startString = "<title>"; //定義開始字符串
- string endString = "</title>"; //定義結束字符串
- string title = ""; //定義最終要採集的字符串變量
- Regex sTitle = new Regex(startString); //定義開始字符串的與此同時表達式
- Regex eTitle = new Regex(endString); //定義結束字符串的與此同時表達式
- Match sm = sTitle.Match(content); //匹配開始字符串
- Match em = eTitle.Match(content); //匹配結束字符串
- if (sm.Success&em.Success)
- {
- title = content.Substring(sm.Index + startString.Length, em.Index-sm.Index-startString.Length); //運算得出要採集的字符串
- MessageBox.Show(title.Trim());
- }
- }
- using System;
- using System.Collections.Generic;
- using System.ComponentModel;
- using System.Data;
- using System.Drawing;
- using System.Text;
- using System.Windows.Forms;
- using System.IO;
- using System.Net;
- using System.Text.RegularExpressions;
- private void button2_Click(object sender, EventArgs e)
- {
- WebRequest req = WebRequest.Create("http://www.baidu.com/");
- WebResponse result = req.GetResponse();
- StreamReader sr = new StreamReader(result.GetResponseStream(),System.Text.Encoding.GetEncoding("gb2312"));
- string content = sr.ReadToEnd();
- richTextBox1.Text = content;
- string startString = "<title>";
- string endString = "</title>";
- string title = "";
- Regex sTitle = new Regex(startString);
- Regex eTitle = new Regex(endString);
- Match sm = sTitle.Match(content);
- Match em = eTitle.Match(content);
- if (sm.Success&em.Success)
- {
- title = content.Substring(sm.Index + startString.Length, em.Index-sm.Index-startString.Length);
- MessageBox.Show(title.Trim());
- //string abc = "wocaonima";
- //MessageBox.Show(abc.Substring(1,2));
- }
- }
- using System.Net;
- ....
- WebClient client = new WebClient();
- string result = client.DownloadString(http://www.baidu.com/);
- ...