问题求助

进行Email三层采集,但是界面容易死锁而且很慢     代码如下    帮忙看看啊

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Threading;
using System.Collections;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;

namespace 采集Email地址
{
    public partial class Form1 : Form
    {
        private delegate void AppendEmailDelegate(string p);

        private void AppendEmail(string email)
        {
            this.textBox4.Text += email.ToString() + "/r/n";
        }

        public Form1()
        {
            InitializeComponent();
            //  Control.CheckForIllegalCrossThreadCalls = false;
            //  this.comboBox1.SelectedIndex = 0;
        }

        private void button1_Click(object sender, EventArgs e)
        {
            if (this.listView1.Items.Count > 0)
            {
                this.listView1.Clear();
            }

            ArrayList linkStr = GetAllURL(this.textBox1.Text);
            foreach (var item in linkStr)
            {
                //  Invoke(new AppendTextDelegate(AppendText), new object[] { item + "/r/n" });
                listView1.Items.Add(item.ToString());
            }

        }


        private void btnOk_Click(object sender, EventArgs e)
        {
            foreach (ListViewItem item in listView1.Items)
            {
                if (item.Selected)
                {
                    ArrayList strUrl = GetWebLinkUrl(item.Text, @"<a[^<>]*?hrefs*=s*['""s]([^""']*)['""][^<>]*?>(.*?)</a>");
                    foreach (var EmailItem in strUrl)
                    {
                        //  GetEmailAddress(EmailItem + "/r/n");
                        ParameterizedThreadStart threadStart = new ParameterizedThreadStart(GetEmailAddress);
                        Thread thread = new Thread(threadStart);
                        thread.Start(EmailItem);
                    }
                }
            }
        }

        //得到所有链接打开的网页
        private ArrayList GetAllURL(string urlStr)
        {
            new Thread(new ParameterizedThreadStart(GetEmailAddress)).Start(urlStr);
            // 处理页面中的Link
            ArrayList linkStrs = GetWebLinkUrl((string)urlStr, @"<a[^<>]*?hrefs*=s*['""s]([^""']*)['""][^<>]*?>(.*?)</a>");
            return linkStrs;
        }

        /// <summary>
        /// 提取网页中的Eamil
        ///</summary>
        /// <param name="urlStr">网页地址</param>
        private void GetEmailAddress(object urlStr)
        {
            ArrayList EmailStrs = GetWebInfo((string)urlStr, @"(?<EmailStr>/b[A-Z0-9._%-]+@[A-Z0-9._%-]+/.[A-Z]{2,4}/b)");

            //得到Email
            foreach (object tmp in EmailStrs)
            {
                // Invoke(new AppendTextDelegate(AppendText), new object[] { tmp + "/r/n" });
                if (!this.textBox4.Text.Contains(tmp.ToString()))
                {
                    AppendEmailDelegate email = new AppendEmailDelegate(this.AppendEmail);
                    this.Invoke(email, new object[] { tmp });
                }

            }
        }

 

        private ArrayList GetWebInfo(string URlStr, string RegExpress)
        {
            ArrayList Result = new ArrayList();
            try
            {
                //打开指定页
                HttpWebRequest webRequest1 = (HttpWebRequest)WebRequest.Create(new Uri(URlStr));
                webRequest1.Method = "GET";
                HttpWebResponse response = (HttpWebResponse)webRequest1.GetResponse();
                String textData = new StreamReader(response.GetResponseStream(), Encoding.Default).ReadToEnd();


                //用正则表达式,提取指定内容,带一个变量
                Regex r;
                Match m;
                r = new Regex(RegExpress,   //@"copyTitle./'(?<AdInfo>.*)/'",
                    RegexOptions.IgnoreCase | RegexOptions.Compiled);
                int pos1 = RegExpress.IndexOf("(?<");
                int pos2 = RegExpress.IndexOf(">", pos1);
                string DestionKey = RegExpress.Substring(pos1 + 3, pos2 - pos1 - 3);

                string AdStr = "";

                for (m = r.Match(textData); m.Success; m = m.NextMatch())
                {
                    AdStr = m.Result("${" + DestionKey + "}").Trim();   //地址
                    Result.Add(AdStr);
                }
            }
            catch (Exception)
            {

            }
            return Result;
        }


        /// <summary>
        /// 得到网页所有的链接
        /// </summary>
        /// <param name="URlStr">网页地址</param>
        /// <param name="RegExpress">正则表达式</param>
        /// <returns>返回所有的链接地址</returns>
        private ArrayList GetWebLinkUrl(string URlStr, string RegExpress)
        {
            ArrayList strLink = new ArrayList();
            try
            {
                string responseText;
                //读取指定网页的源文件
                //  Uri url = new Uri(@"http://www.99inf.com/html/1070414.html");
                HttpWebRequest req = (HttpWebRequest)WebRequest.Create(URlStr);
                HttpWebResponse res = (HttpWebResponse)req.GetResponse();
                req.Method = "Post";
                StreamReader reader = new StreamReader(res.GetResponseStream(), Encoding.Default);
                responseText = reader.ReadToEnd();

                //得到所有的链接
                StreamWriter saveFile = new StreamWriter("myFile2");
                saveFile.Write(responseText);
                saveFile.Close();
                res.Close();
                Regex reg = null;
                Match mch = null;
                //  StringBuilder sb = new StringBuilder();

                reg = new Regex(RegExpress, RegexOptions.IgnoreCase | RegexOptions.Compiled);
                for (mch = reg.Match(responseText); mch.Success; mch = mch.NextMatch())
                {
                    if (mch.Groups[1].Value.Contains("http:"))
                    {
                        strLink.Add(mch.Groups[1].Value);
                    }
                    else
                    {
                        strLink.Add(URlStr + mch.Groups[1].Value);
                    }
                }
            }
            catch (Exception)
            {
            }
            return strLink;
        }

 

        private void button1_Click_1(object sender, EventArgs e)
        {
            string path1 = textBox1.Text;
            switch (this.comboBox1.Text)
            {
                case "第一层":
                    GetEmailAddress(path1);
                    break;
                case "第二层":

                    ArrayList link = GetAllURL(path1);
                    foreach (string item in link)
                    {
                        bool result = System.Threading.ThreadPool.QueueUserWorkItem(GetEmailAddress, item);

                        if (!result)
                            MessageBox.Show("分布线程失败");
                    }
                    break;
                case "第三层":
                    this.listView1.Clear();
                    ArrayList linkThird = GetAllURL(path1);
                    foreach (string item in linkThird)
                    {
                        ArrayList strUrl = GetAllURL(item);
                        foreach (string EmailItem in strUrl)
                        {
                            this.listView1.Items.Add(EmailItem);
                            bool result = System.Threading.ThreadPool.QueueUserWorkItem(GetEmailAddress, item);

                            if (!result)
                                MessageBox.Show("分布线程失败");
                            Thread.Sleep(30);
                        }
                    }
                    break;
                default:
                    break;
            }
        }
    }
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值