c#采集

using System;
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Text.RegularExpressions;
using System.Threading;
using System.net;
using System.IO;

namespace WindowsApplication1
{
    /** <summary>
    /// Form1 的摘要说明。
    /// </summary>
    public class Form1 : System.Windows.Forms.Form
    {
        private System.Windows.Forms.TextBox HttpUrl;
        private System.Windows.Forms.Button button1;
        private System.Windows.Forms.TextBox textBox2;
        private System.Windows.Forms.Button button2;
        private System.Windows.Forms.ListBox listBox1;
        private System.Windows.Forms.ListBox listBox2;
        private System.Windows.Forms.TextBox textBox1;
        private System.Windows.Forms.Button button3;
        private System.Windows.Forms.Button button4;
        private System.Windows.Forms.ProgressBar progressBar1;
        private System.Windows.Forms.TextBox textBox3;
        private System.Windows.Forms.TextBox textBox4;
        private System.Windows.Forms.TextBox textBox5;
        /** <summary>
        /// 必需的设计器变量。
        /// </summary>
        private System.ComponentModel.Container components = null;

        public Form1()
        {
            //
            // Windows 窗体设计器支持所必需的
            //
            InitializeComponent();

            //
            // TODO: 在 InitializeComponent 调用后添加任何构造函数代m
            //
        }

        /** <summary>
        /// 清理所有正在使用的资源。
        /// </summary>
        protected override void Dispose( bool disposing )
        {
            if( disposing )
            {
                if (components != null)
                {
                    components.Dispose();
                }
            }
            base.Dispose( disposing );
        }

        Windows 窗体设计器生成的代m#region Windows 窗体设计器生成的代m
        /** <summary>
        /// 设计器支持所需的方法 - 不要使用代m编辑器修改
        /// 此方法的内容。
        /// </summary>
        private void InitializeComponent()
        {
            this.HttpUrl = new System.Windows.Forms.TextBox();
            this.button1 = new System.Windows.Forms.Button();
            this.textBox2 = new System.Windows.Forms.TextBox();
            this.button2 = new System.Windows.Forms.Button();
            this.listBox1 = new System.Windows.Forms.ListBox();
            this.listBox2 = new System.Windows.Forms.ListBox();
            this.textBox1 = new System.Windows.Forms.TextBox();
            this.button3 = new System.Windows.Forms.Button();
            this.button4 = new System.Windows.Forms.Button();
            this.progressBar1 = new System.Windows.Forms.ProgressBar();
            this.textBox3 = new System.Windows.Forms.TextBox();
            this.textBox4 = new System.Windows.Forms.TextBox();
            this.textBox5 = new System.Windows.Forms.TextBox();
            this.SuspendLayout();
            //
            // HttpUrl
            //
            this.HttpUrl.Location = new System.Drawing.Point(16, 16);
            this.HttpUrl.Name = "HttpUrl";
            this.HttpUrl.Size = new System.Drawing.Size(280, 21);
            this.HttpUrl.TabIndex = 0;
            this.HttpUrl.Text = "http://www.playasp.com/article/22/ArticleList22_1.html";
            this.HttpUrl.TextChanged += new System.EventHandler(this.textBox1_TextChanged);
            //
            // button1
            //
            this.button1.Location = new System.Drawing.Point(312, 16);
            this.button1.Name = "button1";
            this.button1.TabIndex = 1;
            this.button1.Text = "读取网站";
            this.button1.Click += new System.EventHandler(this.button1_Click);
            //
            // textBox2
            //
            this.textBox2.Location = new System.Drawing.Point(16, 56);
            this.textBox2.Multiline = true;
            this.textBox2.Name = "textBox2";
            this.textBox2.Size = new System.Drawing.Size(424, 80);
            this.textBox2.TabIndex = 2;
            this.textBox2.Text = "textBox2";
            //
            // button2
            //
            this.button2.Location = new System.Drawing.Point(24, 288);
            this.button2.Name = "button2";
            this.button2.TabIndex = 4;
            this.button2.Text = "配列表";
            this.button2.Click += new System.EventHandler(this.button2_Click);
            //
            // listBox1
            //
            this.listBox1.ItemHeight = 12;
            this.listBox1.Location = new System.Drawing.Point(16, 144);
            this.listBox1.Name = "listBox1";
            this.listBox1.Size = new System.Drawing.Size(424, 64);
            this.listBox1.TabIndex = 5;
            //
            // listBox2
            //
            this.listBox2.ItemHeight = 12;
            this.listBox2.Location = new System.Drawing.Point(16, 216);
            this.listBox2.Name = "listBox2";
            this.listBox2.Size = new System.Drawing.Size(424, 64);
            this.listBox2.TabIndex = 6;
            //
            // textBox1
            //
            this.textBox1.Location = new System.Drawing.Point(16, 328);
            this.textBox1.Multiline = true;
            this.textBox1.Name = "textBox1";
            this.textBox1.Size = new System.Drawing.Size(424, 96);
            this.textBox1.TabIndex = 7;
            this.textBox1.Text = "textBox1";
            //
            // button3
            //
            this.button3.Location = new System.Drawing.Point(240, 520);
            this.button3.Name = "button3";
            this.button3.TabIndex = 8;
            this.button3.Text = "读取内容";
            this.button3.Click += new System.EventHandler(this.button3_Click);
            //
            // button4
            //
            this.button4.Location = new System.Drawing.Point(360, 520);
            this.button4.Name = "button4";
            this.button4.TabIndex = 9;
            this.button4.Text = "清空数据";
            this.button4.Click += new System.EventHandler(this.button4_Click);
            //
            // progressBar1
            //
            this.progressBar1.Location = new System.Drawing.Point(16, 488);
            this.progressBar1.Name = "progressBar1";
            this.progressBar1.Size = new System.Drawing.Size(416, 23);
            this.progressBar1.TabIndex = 10;
            this.progressBar1.Click += new System.EventHandler(this.progressBar1_Click);
            //
            // textBox3
            //
            this.textBox3.Location = new System.Drawing.Point(120, 288);
            this.textBox3.Name = "textBox3";
            this.textBox3.Size = new System.Drawing.Size(320, 21);
            this.textBox3.TabIndex = 11;
            this.textBox3.Text = "textBox3";
            //
            // textBox4
            //
            this.textBox4.Location = new System.Drawing.Point(16, 440);
            this.textBox4.Multiline = true;
            this.textBox4.Name = "textBox4";
            this.textBox4.Size = new System.Drawing.Size(416, 40);
            this.textBox4.TabIndex = 12;
            this.textBox4.Text = "textBox4";
            //
            // textBox5
            //
            this.textBox5.Location = new System.Drawing.Point(472, 64);
            this.textBox5.Multiline = true;
            this.textBox5.Name = "textBox5";
            this.textBox5.Size = new System.Drawing.Size(232, 448);
            this.textBox5.TabIndex = 13;
            this.textBox5.Text = "textBox5";
            //
            // Form1
            //
            this.AutoScaleBaseSize = new System.Drawing.Size(6, 14);
            this.ClientSize = new System.Drawing.Size(712, 549);
            this.Controls.Add(this.textBox5);
            this.Controls.Add(this.textBox4);
            this.Controls.Add(this.textBox3);
            this.Controls.Add(this.progressBar1);
            this.Controls.Add(this.button4);
            this.Controls.Add(this.button3);
            this.Controls.Add(this.textBox1);
            this.Controls.Add(this.listBox2);
            this.Controls.Add(this.listBox1);
            this.Controls.Add(this.button2);
            this.Controls.Add(this.textBox2);
            this.Controls.Add(this.button1);
            this.Controls.Add(this.HttpUrl);
            this.Name = "Form1";
            this.Text = "Form1";
            this.ResumeLayout(false);

        }
        #endregion

        /** <summary>
        /// 应用程序的主入口点。
        /// </summary>
        [STAThread]
        static void Main()
        {
            Application.Run(new Form1());
        }

        private void textBox1_TextChanged(object sender, System.EventArgs e)
        {
       
        }

   
        private void button1_Click(object sender, System.EventArgs e)
        {
            string get_url=HttpUrl.Text;
            HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(get_url);
            HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();
            Stream MyInStream = null;
            MyInStream = MyResponse.GetResponseStream();
            long fileSizeInBytes = MyResponse.ContentLength;
            //创建文件流对象

            int length = 10240;
            byte[] buffer = new byte[10250];
            int bytesread = 0;
            string strtemp = "";
            while ((bytesread = MyInStream.Read(buffer, 0, length)) > 0)
            {    //把数据写入文件

                strtemp += System.Text.Encoding.Default.GetString(buffer, 0, bytesread);
            }

            textBox2.Text=strtemp;
        }

        private void button2_Click(object sender, System.EventArgs e)
        {
            string Match_Url="http://www.playasp.com/article/22/Article"+@"/d{4}_/d{1}.html";

            Regex re = new Regex(Match_Url);
            MatchCollection matches = re.Matches(textBox2.Text);
            System.Collections.IEnumerator enu = matches.GetEnumerator();
                         int j=0;       
           
           
            while (enu.MoveNext() && enu.Current != null)
            {
                Match match = (Match)(enu.Current);
                int kg = 0;
                for (int i = 1; i < listBox1.Items.Count; i++)
                {
                        string mytemp=listBox1.Items[i].ToString();
                    if (match.Value==mytemp )
                    {
                        kg = 1;
                        break;
                    }

                }
   
                if (kg == 0)
                {
                   
                    j++;
                    listBox1.Items.Add(match.Value);
                    Regex rr = new Regex(match.Value + "[^<]+>+([^<]*)</a>", RegexOptions.IgnoreCase);
                    Match mm = rr.Match(textBox2.Text);
                    while (mm.Success)
                    {
                        Group g11 = mm.Groups[1];
                        string mynewstitle = g11.ToString().Trim();
                        mm = mm.NextMatch();
                        listBox2.Items.Add(j.ToString()+";"+mynewstitle);
                    }

                }
            }
   
        int mycount = listBox1.Items.Count ;
        MessageBox.Show("链接地址提取" + mycount + "成功!", "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);

        }

        private void button3_Click(object sender, System.EventArgs e)
        {
            //listBox1.Items.Clear();
            //listBox2.Items.Clear();
            Thread t = new Thread(new ThreadStart(geturl));
            t.Start();
        }

        private void button4_Click(object sender, System.EventArgs e)
        {
            textBox1.Text="";
            textBox2.Text="";
            listBox1.Items.Clear();
            listBox2.Items.Clear();

        }

 

            int myint;
        string newstitle="";
        string newcontent;

        public void geturl()//新闻入库
        {

           
            this.progressBar1.Maximum=listBox1.Items.Count;
            for (int i = 0; i < listBox1.Items.Count; i++)

            {
                textBox1.Text = listBox1.Items[i].ToString();
                string URL = this.textBox1.Text.Trim();
                //加"http://"标志
                if (URL.IndexOf(@"http://") == -1)
                {
                    URL = @"http://" + URL;
                }
                HttpWebRequest MyRequest = (HttpWebRequest)WebRequest.Create(URL);
                //发送请求,获取响应
                Stream MyInStream = null;
                try
                {
                    HttpWebResponse MyResponse = (HttpWebResponse)MyRequest.GetResponse();             
                 
                    MyInStream = MyResponse.GetResponseStream();
                    long fileSizeInBytes = MyResponse.ContentLength;
                    //创建文件流对象

                    int length = 10240;
                    byte[] buffer = new byte[10250];
                    int bytesread = 0;
                    string strtemp = "";
                    //从网络读取数据
                    while ((bytesread = MyInStream.Read(buffer, 0, length)) > 0)
                    {    //把数据写入文件

                        strtemp += System.Text.Encoding.Default.GetString(buffer, 0, bytesread);
                        textBox1.Text = strtemp;
                    }
               
                }
                catch (Exception Err)
                {
                    MessageBox.Show("读取网页失败!错误是:" + Err.Message, "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                }
                finally
                {
                    //关闭流
                    if (MyInStream != null)
                    {
                        MyInStream.Close();
                    }

                }
                //分析所抓的数据
                //分析标题
   
                Regex rr = new Regex("<title>([^<]*)</title>", RegexOptions.IgnoreCase);
                Match mm = rr.Match(textBox1.Text.Replace("<title>'+document.title+'</title>",""));
                while (mm.Success)
                {
                    Group g11 = mm.Groups[1];
                    newstitle = g11.ToString().Trim();
                    mm = mm.NextMatch();
                    newstitle=newstitle.Replace("、", "");
                    newstitle = newstitle.Replace("、", "");
                    newstitle = newstitle.Replace(@"""", """);

                    textBox3.Text = newstitle;


                }
                //MessageBox.Show("到这里来", "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
               
                try
                {
                    string mystring = textBox1.Text;
                     myint = mystring.IndexOf(@"相关文章") - mystring.IndexOf(@"<TD vAlign=top>");
                    newcontent = mystring.Substring(mystring.IndexOf(@"<TD vAlign=top>"), myint);

                    textBox4.Text=newcontent;
                }
                catch
                {
                    string mystring = textBox1.Text;
                    textBox4.Text=Convert.ToString(mystring.IndexOf(@"</TD></TR></TBODY></TABLE>")+"+"+mystring.IndexOf(@"<TD vAlign=top>"));
                }
                finally
                {

                }

 

                //分析完,开始提交数据


                //ASCIIEncoding encoding = new ASCIIEncoding();
                //if(newcontent=="error")
                //{
                //    newstitle = "error";
                          
                //}
                string postData = "newstitle="+newstitle;
                postData += "&newscontent="+newcontent;
               
                byte[] data = System.Text.Encoding.GetEncoding("GB2312").GetBytes(postData);


                // Prepare web request
                HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create("http://localhost/5jiuye/post.asp");

                myRequest.Method = "POST";
                myRequest.ContentType = "application/x-www-form-urlencoded";
                myRequest.ContentLength = data.Length;
                Stream newStream = myRequest.GetRequestStream();

                // Send the data.
                newStream.Write(data, 0, data.Length);
                newStream.Close();

                // Get response
               
                         
                      
                try
                {
                    HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
                    StreamReader reader = new StreamReader(myResponse.GetResponseStream(),System.Text.Encoding.Default);
                    string content = reader.ReadToEnd();
                    textBox5.Text = content;
                }
                catch (Exception Err)
                {
                    textBox1.Text = "错误" + Err;
                }

                textBox2.Text = i.ToString();
                this.progressBar1.Value = this.progressBar1.Value + 1;
                //提交完成
                if (this.progressBar1.Value == listBox1.Items.Count)
                {
                    MessageBox.Show("完成", "信息提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
                }

 

           
            }       

        }

        private void progressBar1_Click(object sender, System.EventArgs e)
        {
       
        }


}
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值