C#通过webclient下载网页字符串

最新推荐文章于 2018-07-23 09:19:21 发布

丷寒枫丷

最新推荐文章于 2018-07-23 09:19:21 发布

阅读量785

点赞数

分类专栏： C#

本文链接：https://blog.csdn.net/qq_24432127/article/details/79619599

版权

C# 专栏收录该内容

34 篇文章 0 订阅

订阅专栏

using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;


namespace _03通过webclient来提取Email地址
{
    class Program
    {
        static void Main(string[] args)
        {
            #region 通过webclient下载
            //下载字符串
            var a = 0;
            WebClient client = new WebClient();
 string html = client.DownloadString(@"http://www.360doc.com/content/16/0622/21/6598516_569892154.shtml");


            //从html字符串中提取邮件地址https://www.baidu.com
            MatchCollection mcs = Regex.Matches(html, @"[-a-zA-Z0-9_.]+@[-a-zA-Z]+(\.[a-zA-Z0-9]+)+");
            foreach (Match item in mcs)
            {
                Console.WriteLine(item.Value);
            }
            Console.WriteLine(mcs.Count + "个");
            //Console.WriteLine(html.ToString());
            

            #endregion

               #region 提取图片
            //1.下载Html文件
            WebClient client2 = new WebClient();
            //2.提取html中的<img/>标签
            /*
             <img src="http://t1.27270.com/uploads/tu/201803/98/b213b436e6.jpg" width="190" height="280" alt="">
             */
            string html2 = client2.DownloadString(@"http://www.27270.com/ent/meinvtupian/");
            MatchCollection matchs = Regex.Matches(html2, @"<img\ssrc=""(.+?)""", RegexOptions.IgnoreCase);
            //3.通过提取组获取img中src的属性
            foreach (Match item in matchs)
            {
                Console.WriteLine(item.Groups[1].Value);
                //4.通过拼接路径下载图片存储到指定路径
                client2.DownloadFile(item.Groups[1].Value, @"D:\\img\" + DateTime.Now.ToFileTime() +" .jpg");
            }

            #endregion

             #region 提取超链接
            WebClient clien3 = new WebClient();
            string html3 = clien3.DownloadString(@"https://www.baidu.com/");
            MatchCollection matchs3 = Regex.Matches(html, @"<a.+ href=""(.+?)""", RegexOptions.IgnoreCase);
            foreach (Match item in matchs3)
            {
                Console.WriteLine(item.Value);
                Console.WriteLine(item.Groups[1].Value);
            }

            #endregion

            /*

            使用 webclient 下载指定html页面字符串

            然后分析字符串使用正则表达式获取想要得到的字符串信息 然后进行下载，提取等操作

            */

            Console.ReadKey();

        }
    }
}