学生时代的小玩具
用C#.net开发的一个用来抓取当当网计算机类图书信息的工具
Program.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Windows.Forms;
namespace spider
{
static class Program
{
/// <summary>
/// 应用程序的主入口点。
/// </summary>
[STAThread]
static void Main()
{
Application.EnableVisualStyles();
Application.SetCompatibleTextRenderingDefault(false);
Application.Run(new Form1());
}
}
}
Form1.cs
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.IO;
namespace spider
{
public partial class Form1 : Form
{
private string url = @"http://category.dangdang.com/all/?category_path=01.54.26.00.00.00&page_index=";
private static int page = 1;
private Parse p;
public Form1()
{
InitializeComponent();
}
private void buttonstart_Click(object sender, EventArgs e)
{
page = 1;
Execute();
}
private void buttonprev_Click(object sender, EventArgs e)
{
page--;
Execute();
}
private void buttonnext_Click(object sender, EventArgs e)
{
page++;
Execute();
}
private void buttonjump_Click(object sender, EventArgs e)
{
page = int.Parse(textBox2.Text);
Execute();
}
private void Execute()
{
webBrowser1.Navigate(url + page.ToString());
textBox1.Text = url + page.ToString();
Cursor.Current = Cursors.WaitCursor;
}
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
HtmlDocument doc = webBrowser1.Document;
p = new Parse(doc);
DataTable dt = p.dt;
dataGridView1.DataSource = dt;
dataGridView1.Columns[0].Width = 150;
dataGridView1.Columns[1].Width = 150;
dataGridView1.Columns[2].Width = 150;
dataGridView1.Columns[3].Width = 80;
dataGridView1.Columns[4].Width = 450;
Cursor.Current = Cursors.Default;
MessageBox.Show("解析完成");
}
private void buttonsave_Click(object sender, EventArgs e)
{
SaveFileDialog sfd = new SaveFileDialog();
sfd.DefaultExt = "txt";
if (sfd.ShowDialog() == DialogResult.OK)
{
string path = sfd.FileName;
StringBuilder sb = new StringBuilder();
List<Book> list = p.list;
foreach (Book book in list)
{
sb.Append(book.ToString());
}
string text = sb.ToString();
File.AppendAllText(path, text, Encoding.Default);
MessageBox.Show("保存成功\n" + path);
}
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace spider
{
class Book
{
public string name { get; set; }
public string author { get; set; }
public string pub { get; set; }
public string time { get; set; }
public string describ { get; set; }
public Book()
{
}
public Book(string name, string author, string pub, string time, string describ)
{
this.name = name;
this.author = author;
this.pub = pub;
this.time = time;
this.describ = describ;
}
public override string ToString()
{
return "书名:" + name + "\r\n"
+ "作者:" + author + "\r\n"
+ "出版商:" + pub + "\r\n"
+ "出版时间:" + time + "\r\n"
+ "描述:" + describ + "\r\n\r\n";
}
}
}
Parse.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Data;
using System.Text.RegularExpressions;
namespace spider
{
class Parse
{
private HtmlDocument dom;
public DataTable dt { get; set; }
public List<Book> list { get; set; }
public Parse(HtmlDocument dom)
{
this.dom = dom;
dt = new DataTable();
list = new List<Book>();
dt.Columns.Add("书名");
dt.Columns.Add("作者");
dt.Columns.Add("出版社");
dt.Columns.Add("出版时间");
dt.Columns.Add("描述");
Execute();
}
public void Execute()
{
HtmlElementCollection els = dom.GetElementsByTagName("div");
foreach (HtmlElement el in els)
{
if (el.GetAttribute("classname") == "listitem detail")//图书信息
{
Book book = new Book();
HtmlElementCollection els2 = el.GetElementsByTagName("li");
foreach (HtmlElement el2 in els2)
{
if (el2.GetAttribute("classname") == "maintitle")//书名
{
book.name = el2.OuterText;
}
if (el2.GetAttribute("classname") == "publisher_info")
{
HtmlElementCollection els3 = el2.GetElementsByTagName("a");
StringBuilder sb = new StringBuilder();
foreach (HtmlElement el3 in els3)
{
if (el3.GetAttribute("name") == "Author")//作者
{
if (sb.Length==0)
{
sb.Append(el3.OuterText);
}
else
{
sb.Append("," + el3.OuterText);
}
}
if (el3.GetAttribute("name") == "Pub")//出版商
{
book.pub = el3.OuterText;
}
}
book.author = sb.ToString();
Regex r = new Regex(@"(\d{4})\-(\d{2})\-(\d{2})");
Match m = r.Match(el2.OuterText);
if (m.Success)//出版时间
{
book.time = m.Value;
}
}
if (el2.GetAttribute("classname") == "describ")//描述
{
book.describ = el2.OuterText;
}
}
DataRow dr = dt.NewRow();
dr["书名"] = book.name;
dr["作者"] = book.author;
dr["出版社"] = book.pub;
dr["出版时间"] = book.time;
dr["描述"] = book.describ;
dt.Rows.Add(dr);
list.Add(book);
}
}
}
}
}