进行Email三层采集,但是界面容易死锁而且很慢 代码如下 帮忙看看啊
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Threading;
using System.Collections;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace 采集Email地址
{
public partial class Form1 : Form
{
private delegate void AppendEmailDelegate(string p);
private void AppendEmail(string email)
{
this.textBox4.Text += email.ToString() + "/r/n";
}
public Form1()
{
InitializeComponent();
// Control.CheckForIllegalCrossThreadCalls = false;
// this.comboBox1.SelectedIndex = 0;
}
private void button1_Click(object sender, EventArgs e)
{
if (this.listView1.Items.Count > 0)
{
this.listView1.Clear();
}
ArrayList linkStr = GetAllURL(this.textBox1.Text);
foreach (var item in linkStr)
{
// Invoke(new AppendTextDelegate(AppendText), new object[] { item + "/r/n" });
listView1.Items.Add(item.ToString());
}
}
private void btnOk_Click(object sender, EventArgs e)
{
foreach (ListViewItem item in listView1.Items)
{
if (item.Selected)
{
ArrayList strUrl = GetWebLinkUrl(item.Text, @"<a[^<>]*?hrefs*=s*['""s]([^""']*)['""][^<>]*?>(.*?)</a>");
foreach (var EmailItem in strUrl)
{
// GetEmailAddress(EmailItem + "/r/n");
ParameterizedThreadStart threadStart = new ParameterizedThreadStart(GetEmailAddress);
Thread thread = new Thread(threadStart);
thread.Start(EmailItem);
}
}
}
}
//得到所有链接打开的网页
private ArrayList GetAllURL(string urlStr)
{
new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr);
// 处理页面中的Link
ArrayList linkStrs = GetWebLinkUrl((string)urlStr, @"<a[^<>]*?hrefs*=s*['""s]([^""']*)['""][^<>]*?>(.*?)</a>");
return linkStrs;
}
/// <summary>
/// 提取网页中的Eamil
///</summary>
/// <param name="urlStr">网页地址</param>
private void GetEmailAddress(object urlStr)
{
ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>/b[A-Z0-9._%-]+@[A-Z0-9._%-]+/.[A-Z]{2,4}/b)");
//得到Email
foreach (object tmp in EmailStrs)
{
// Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "/r/n" });
if (!this.textBox4.Text.Contains(tmp.ToString()))
{
AppendEmailDelegate email = new AppendEmailDelegate(this.AppendEmail);
this.Invoke(email, new object[] { tmp });
}
}
}
private ArrayList GetWebInfo(string URlStr, string RegExpress)
{
ArrayList Result = new ArrayList();
try
{
//打开指定页
HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
webRequest1.Method = "GET";
HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();
//用正则表达式,提取指定内容,带一个变量
Regex r;
Match m;
r = new Regex(RegExpress, //@"copyTitle./'(?<AdInfo>.*)/'",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
int pos1 = RegExpress.IndexOf("(?<");
int pos2 = RegExpress.IndexOf(">", pos1);
string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);
string AdStr = "";
for (m = r.Match(textData); m.Success; m = m.NextMatch())
{
AdStr = m.Result("${" + DestionKey + "}").Trim(); //地址
Result.Add(AdStr);
}
}
catch (Exception)
{
}
return Result;
}
/// <summary>
/// 得到网页所有的链接
/// </summary>
/// <param name="URlStr">网页地址</param>
/// <param name="RegExpress">正则表达式</param>
/// <returns>返回所有的链接地址</returns>
private ArrayList GetWebLinkUrl(string URlStr, string RegExpress)
{
ArrayList strLink = new ArrayList();
try
{
string responseText;
//读取指定网页的源文件
// Uri url = new Uri(@"http://www.99inf.com/html/1070414.html");
HttpWebRequest req = (HttpWebRequest)WebRequest.Create(URlStr);
HttpWebResponse res = (HttpWebResponse)req.GetResponse();
req.Method = "Post";
StreamReader reader = new StreamReader(res.GetResponseStream(), Encoding.Default);
responseText = reader.ReadToEnd();
//得到所有的链接
StreamWriter saveFile = new StreamWriter("myFile2");
saveFile.Write(responseText);
saveFile.Close();
res.Close();
Regex reg = null;
Match mch = null;
// StringBuilder sb = new StringBuilder();
reg = new Regex(RegExpress, RegexOptions.IgnoreCase | RegexOptions.Compiled);
for (mch = reg.Match(responseText); mch.Success; mch = mch.NextMatch())
{
if (mch.Groups[1].Value.Contains("http:"))
{
strLink.Add(mch.Groups[1].Value);
}
else
{
strLink.Add(URlStr + mch.Groups[1].Value);
}
}
}
catch (Exception)
{
}
return strLink;
}
private void button1_Click_1(object sender, EventArgs e)
{
string path1 = textBox1.Text;
switch (this.comboBox1.Text)
{
case "第一层":
GetEmailAddress(path1);
break;
case "第二层":
ArrayList link = GetAllURL(path1);
foreach (string item in link)
{
bool result = System.Threading.ThreadPool.QueueUserWorkItem(GetEmailAddress, item);
if (!result)
MessageBox.Show("分布线程失败");
}
break;
case "第三层":
this.listView1.Clear();
ArrayList linkThird = GetAllURL(path1);
foreach (string item in linkThird)
{
ArrayList strUrl = GetAllURL(item);
foreach (string EmailItem in strUrl)
{
this.listView1.Items.Add(EmailItem);
bool result = System.Threading.ThreadPool.QueueUserWorkItem(GetEmailAddress, item);
if (!result)
MessageBox.Show("分布线程失败");
Thread.Sleep(30);
}
}
break;
default:
break;
}
}
}
}