【橙子】C#WPF--网络资源爬虫

C#WPF–网络资源爬虫

作者心得
真没你想的那么高级,真的
算是我第一个与http有交互的玩意儿吧
运用到了:正则表达式
(后来代码被我弄去研究了,貌似改动过)

源代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;

namespace TQJ
{
    /// <summary>
    /// MainWindow.xaml 的交互逻辑
    /// </summary>
    public partial class MainWindow : Window
    {
       
        public MainWindow()
        {
            InitializeComponent();
        }


        public string wwwtext;
        public static string textclass;
      

        private void Window_Initialized(object sender, EventArgs e)
        {
          
            if (!Directory.Exists(@"e:\data"))
            {
                Directory.CreateDirectory(@"e:\data");
            }

        }
        public static string get_uft8(string unicodeString)
        {
            UTF8Encoding utf8 = new UTF8Encoding();
            Byte[] encodedBytes = utf8.GetBytes(unicodeString);
            String decodedString = utf8.GetString(encodedBytes);
            return decodedString;
        }

        private void Button_Click(object sender, RoutedEventArgs e)//提取图片
        {
            wwwtext = www.Text;
           string lt= ("_" + System.Guid.NewGuid().ToString() + "_" + System.Guid.NewGuid().ToString()).ToUpper();
            try
            {
                //WebClient client = new WebClient();
                Byte[] pageData = client.DownloadData("");
                string html= Encoding.GetEncoding("utf-8").GetString(pageData);
                TextBox.Text += html;

                MatchCollection matches = Regex.Matches(html, @"[0-9]{4}", RegexOptions.IgnoreCase);

                foreach (Match item in matches)
                {
                    TextBox.Text = TextBox.Text + item.Value;
                    string pathImg = "http://120.77.214.11/code.ajx?t=" + item.Value;
                //client.DownloadFile("https://vpn.jift.edu.cn/http/77726476706e69737468656265737421e3e44ed22d396e44300d8db9d6562d/cas/codeimage?vpn-1", @"e:\data\" + System.DateTime.Now.ToFileTime() + ".jpg");
                //                          //}
            MessageBox.Show("提取成功!", "提示!");
            }
            catch
            {
                MessageBox.Show("该网站已加密,请换一个试试");
            }

        }

        private void Button_Click_1(object sender, RoutedEventArgs e)//提取电话
        {
            StreamWriter sw = new StreamWriter(@"e:\data\phone.txt", true); 
            wwwtext = www.Text;
            try
            {
                WebClient client = new WebClient();
                string html = client.DownloadString(wwwtext);
                MatchCollection matches = Regex.Matches(html, @"(134|135|136|137|138|139|150|151|152|157|158|159|147|182|183|184|187|188|170|178|130|131|132|145|155|156|185|186|176|170|171|166|133|153|180|181|189|170|177|173)(\b)?[0-9]{8}");
                foreach (Match item in matches)
                {
                    sw.WriteLine(item.Value);
                    TextBox.Text = TextBox.Text +"\r\n"+ item.Value;
                }
                MessageBox.Show("提取成功!", "提示!");
            }
            catch
            {
                MessageBox.Show("该网站不存在电话号码,请换一个试试");
            }

            sw.Close();

        }

        private void Button_Click_2(object sender, RoutedEventArgs e)
        {
            StreamWriter sw = new StreamWriter(@"e:\data\email.txt", true);
            wwwtext = www.Text;
            try
            {
                WebClient client = new WebClient();
                string html = client.DownloadString(wwwtext);
                MatchCollection matches = Regex.Matches(html, @"([A-Za-z0-9\u4e00-\u9fa5]+)@([a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+)");
                foreach (Match item in matches)
                {
                    sw.WriteLine("用户名:" + item.Groups[1].Value+";域名:"+ item.Groups[2].Value);
                    TextBox.Text = TextBox.Text + "\r\n" + "用户名:" + item.Groups[1].Value + ";域名:" + item.Groups[2].Value;
                }
                MessageBox.Show("提取成功!", "提示!");
            }
            catch
            {
                MessageBox.Show("该网站不存在邮箱,请换一个试试");
            }

            sw.Close();
        }

        private void Button_Click_3(object sender, RoutedEventArgs e)
        {
            StreamWriter sw = new StreamWriter(@"e:\data\address.txt", true);
            wwwtext = www.Text;
            try
            {
                WebClient client = new WebClient();
                string html = client.DownloadString(wwwtext);
                MatchCollection matches = Regex.Matches(html, @"(.+)\[port=([0-9]{2,5})(,type=(.+))?\]");
                foreach (Match item in matches)
                {
                    sw.WriteLine("IP:" + item.Groups[1].Value + ";port:" + item.Groups[2].Value);
                    TextBox.Text = TextBox.Text + "\r\n" + "IP:" + item.Groups[1].Value + ";port:" + item.Groups[2].Value;
                }
                MessageBox.Show("提取成功!", "提示!");
            }
            catch
            {
                MessageBox.Show("该网站不存在地址,请换一个试试");
            }

            sw.Close();
        }

        private void Button_Click_4(object sender, RoutedEventArgs e)
        {
            textclass = www.Text;
            this.Hide();
            Window1 p = new Window1();
            p.Show();
        }

        private void Button_Click_5(object sender, RoutedEventArgs e)
        {
            Form1 form1 = new Form1();
            form1.Show();
        }

        private void Button_Click_6(object sender, RoutedEventArgs e)
        {
           TextBox.Text=( "_"+System.Guid.NewGuid().ToString()+"_"+System.Guid.NewGuid().ToString()).ToUpper();
        }
    }
    }


效果截图

在这里插入图片描述
作者的话
好像这个是我最后一个winform项目了
接下来应该走Asp .net项目了
虽然这2个关系不大,但是winform作为c#入门真的很有帮助!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值