利用WebClient正则表达式下载资源

最新推荐文章于 2024-11-03 15:18:47 发布

Markdon

最新推荐文章于 2024-11-03 15:18:47 发布

阅读量392

点赞数

分类专栏：正则表达式 webcilent 文章标签：正则表达式图片 webclient

本文链接：https://blog.csdn.net/Markdon/article/details/78274964

版权

正则表达式同时被 2 个专栏收录

1 篇文章 0 订阅

订阅专栏

webcilent

1 篇文章 0 订阅

订阅专栏

//我们拿下载图片举个例子
static WebClient client = new WebClient();
class Program
{
static WebClient client = new WebClient();

    static void Main(string[] args)
    {
    string[] str = GetHvtImgUrls(@"http://www.imooc.com/");//下载地址
        string url = "";
        for (int i = 0; i < str.Length; i++)
        {
            url = str[i];
            if (url.EndsWith(".jpg"))//判断是否是以jpg来结尾的
            {
                if (!url.Contains("http:"))//判断是不是以hppt:开头的
                {
                    url = "http:" + url;
                }

                string filepath = @"D:\path练习\" + Guid.NewGuid() + ".jpg";
                client.DownloadFile(url, filepath);
            }
        }

    }

    /// <summary> 
    /// 取得HTML中所有图片的 URL。 
    /// </summary> 
    /// <param name="sHtmlText">HTML代码</param> 
    /// <returns>图片的URL列表</returns> 
    public static string[] GetHvtImgUrls(string url)
    { string content = client.DownloadString(url);//获取返回的信息
     // 定义正则表达式用来匹配 img 标签 （正则表达式很多，若想了解跟多欢迎点击https://baike.baidu.com/item/%E6%AD%A3%E5%88%99%E8%A1%A8%E8%BE%BE%E5%BC%8F/1700215?fr=aladdin）
  Regex m_hvtRegImg = new Regex(@"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>", RegexOptions.IgnoreCase);

        // 搜索匹配的字符串 
        MatchCollection matches = m_hvtRegImg.Matches(content);
        int m_i = 0;
        string[] sUrlList = new string[matches.Count];
        // 取得匹配项列表 
        foreach (Match match in matches)
        sUrlList[m_i++] = match.Groups["imgUrl"].Value;
        return sUrlList;
    }