输入url把网页内容抓下来,然后再加工处理,保存到数据库中,这应该是采集器的原理。
如果要使用C#做的话,就要用WebClient类了。
大气象
using
System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
namespace FirstWebClient
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click( object sender, EventArgs e)
{
WebClient _client = new WebClient();
_client.BaseAddress = " http://www.cnblogs.com " ;
_client.Headers.Add( " Accept " , " image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */* " );
_client.Headers.Add( " Accept-Language " , " zh-cn " );
_client.Headers.Add( " UA-CPU " , " x86 " );
// _client.Headers.Add("Accept-Encoding","gzip, deflate");
_client.Headers.Add( " User-Agent " , " Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727) " );
System.IO.Stream objStream = _client.OpenRead( " / " );
System.IO.StreamReader _read = new System.IO.StreamReader(objStream, System.Text.Encoding.UTF8);
textBox1.Text = _read.ReadToEnd();
}
}
}
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
namespace FirstWebClient
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click( object sender, EventArgs e)
{
WebClient _client = new WebClient();
_client.BaseAddress = " http://www.cnblogs.com " ;
_client.Headers.Add( " Accept " , " image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */* " );
_client.Headers.Add( " Accept-Language " , " zh-cn " );
_client.Headers.Add( " UA-CPU " , " x86 " );
// _client.Headers.Add("Accept-Encoding","gzip, deflate");
_client.Headers.Add( " User-Agent " , " Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727) " );
System.IO.Stream objStream = _client.OpenRead( " / " );
System.IO.StreamReader _read = new System.IO.StreamReader(objStream, System.Text.Encoding.UTF8);
textBox1.Text = _read.ReadToEnd();
}
}
}
参考: http://www.cnblogs.com/titi/archive/2005/11/20/280914.html