C#WPF–网络资源爬虫
作者心得
真没你想的那么高级,真的
算是我第一个与http有交互的玩意儿吧
运用到了:正则表达式
(后来代码被我弄去研究了,貌似改动过)
源代码
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
namespace TQJ
{
/// <summary>
/// MainWindow.xaml 的交互逻辑
/// </summary>
public partial class MainWindow : Window
{
public MainWindow()
{
InitializeComponent();
}
public string wwwtext;
public static string textclass;
private void Window_Initialized(object sender, EventArgs e)
{
if (!Directory.Exists(@"e:\data"))
{
Directory.CreateDirectory(@"e:\data");
}
}
public static string get_uft8(string unicodeString)
{
UTF8Encoding utf8 = new UTF8Encoding();
Byte[] encodedBytes = utf8.GetBytes(unicodeString);
String decodedString = utf8.GetString(encodedBytes);
return decodedString;
}
private void Button_Click(object sender, RoutedEventArgs e)//提取图片
{
wwwtext = www.Text;
string lt= ("_" + System.Guid.NewGuid().ToString() + "_" + System.Guid.NewGuid().ToString()).ToUpper();
try
{
//WebClient client = new WebClient();
Byte[] pageData = client.DownloadData("");
string html= Encoding.GetEncoding("utf-8").GetString(pageData);
TextBox.Text += html;
MatchCollection matches = Regex.Matches(html, @"[0-9]{4}", RegexOptions.IgnoreCase);
foreach (Match item in matches)
{
TextBox.Text = TextBox.Text + item.Value;
string pathImg = "http://120.77.214.11/code.ajx?t=" + item.Value;
//client.DownloadFile("https://vpn.jift.edu.cn/http/77726476706e69737468656265737421e3e44ed22d396e44300d8db9d6562d/cas/codeimage?vpn-1", @"e:\data\" + System.DateTime.Now.ToFileTime() + ".jpg");
// //}
MessageBox.Show("提取成功!", "提示!");
}
catch
{
MessageBox.Show("该网站已加密,请换一个试试");
}
}
private void Button_Click_1(object sender, RoutedEventArgs e)//提取电话
{
StreamWriter sw = new StreamWriter(@"e:\data\phone.txt", true);
wwwtext = www.Text;
try
{
WebClient client = new WebClient();
string html = client.DownloadString(wwwtext);
MatchCollection matches = Regex.Matches(html, @"(134|135|136|137|138|139|150|151|152|157|158|159|147|182|183|184|187|188|170|178|130|131|132|145|155|156|185|186|176|170|171|166|133|153|180|181|189|170|177|173)(\b)?[0-9]{8}");
foreach (Match item in matches)
{
sw.WriteLine(item.Value);
TextBox.Text = TextBox.Text +"\r\n"+ item.Value;
}
MessageBox.Show("提取成功!", "提示!");
}
catch
{
MessageBox.Show("该网站不存在电话号码,请换一个试试");
}
sw.Close();
}
private void Button_Click_2(object sender, RoutedEventArgs e)
{
StreamWriter sw = new StreamWriter(@"e:\data\email.txt", true);
wwwtext = www.Text;
try
{
WebClient client = new WebClient();
string html = client.DownloadString(wwwtext);
MatchCollection matches = Regex.Matches(html, @"([A-Za-z0-9\u4e00-\u9fa5]+)@([a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+)");
foreach (Match item in matches)
{
sw.WriteLine("用户名:" + item.Groups[1].Value+";域名:"+ item.Groups[2].Value);
TextBox.Text = TextBox.Text + "\r\n" + "用户名:" + item.Groups[1].Value + ";域名:" + item.Groups[2].Value;
}
MessageBox.Show("提取成功!", "提示!");
}
catch
{
MessageBox.Show("该网站不存在邮箱,请换一个试试");
}
sw.Close();
}
private void Button_Click_3(object sender, RoutedEventArgs e)
{
StreamWriter sw = new StreamWriter(@"e:\data\address.txt", true);
wwwtext = www.Text;
try
{
WebClient client = new WebClient();
string html = client.DownloadString(wwwtext);
MatchCollection matches = Regex.Matches(html, @"(.+)\[port=([0-9]{2,5})(,type=(.+))?\]");
foreach (Match item in matches)
{
sw.WriteLine("IP:" + item.Groups[1].Value + ";port:" + item.Groups[2].Value);
TextBox.Text = TextBox.Text + "\r\n" + "IP:" + item.Groups[1].Value + ";port:" + item.Groups[2].Value;
}
MessageBox.Show("提取成功!", "提示!");
}
catch
{
MessageBox.Show("该网站不存在地址,请换一个试试");
}
sw.Close();
}
private void Button_Click_4(object sender, RoutedEventArgs e)
{
textclass = www.Text;
this.Hide();
Window1 p = new Window1();
p.Show();
}
private void Button_Click_5(object sender, RoutedEventArgs e)
{
Form1 form1 = new Form1();
form1.Show();
}
private void Button_Click_6(object sender, RoutedEventArgs e)
{
TextBox.Text=( "_"+System.Guid.NewGuid().ToString()+"_"+System.Guid.NewGuid().ToString()).ToUpper();
}
}
}
效果截图
作者的话
好像这个是我最后一个winform项目了
接下来应该走Asp .net项目了
虽然这2个关系不大,但是winform作为c#入门真的很有帮助!