工具介绍:
1)分析网页,获取页面图片。
2)分析网页引用CSS文件,获取背景图片。
3)批量下载。
要点:
1)正则
LINK_PATTERN:获取页面所有连接
BACKGROUND_IMAGE_PATTERN:获取CSS中背景图片地址
CHECK_URL_PATTERN :检测URL是否有效
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
private
const
string
LINK_PATTERN
=
@"
(href|src)=['""]?(?<link>[^'""\s]*)['""]?
"
;
private const string BACKGROUND_IMAGE_PATTERN = @" (url)\(['""]?(?<url>[^'""\s]*)['""]?\) " ;
private const string CHECK_URL_PATTERN = @" ^http(s)?://+([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)? " ;
private const string BACKGROUND_IMAGE_PATTERN = @" (url)\(['""]?(?<url>[^'""\s]*)['""]?\) " ;
private const string CHECK_URL_PATTERN = @" ^http(s)?://+([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)? " ;
2)线程
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
///
<summary>
/// 线程
/// </summary>
private void DoWork()
{
// 网页路径
string url = txtUrl.Text.Trim();
// 保存路径
string saveDir = txtSaveDir.Text.Trim();
lvLog.Items.Clear();
tsslStatus.Text = DESC_ANALYSISING;
tsslTime.Text = string .Format(DESC_SPAND_TIME, 0 );
tsslCount.Text = string .Format(DESC_IMAGES_COUNT, 0 );
tsslTotalTime.Text = string .Format(DESC_DOWNLOAD_TOTAL_TIME, 0 );
btnDownload.Enabled = false ;
Application.DoEvents();
AnalyzeAndDownload(url, saveDir);
}
/// 线程
/// </summary>
private void DoWork()
{
// 网页路径
string url = txtUrl.Text.Trim();
// 保存路径
string saveDir = txtSaveDir.Text.Trim();
lvLog.Items.Clear();
tsslStatus.Text = DESC_ANALYSISING;
tsslTime.Text = string .Format(DESC_SPAND_TIME, 0 );
tsslCount.Text = string .Format(DESC_IMAGES_COUNT, 0 );
tsslTotalTime.Text = string .Format(DESC_DOWNLOAD_TOTAL_TIME, 0 );
btnDownload.Enabled = false ;
Application.DoEvents();
AnalyzeAndDownload(url, saveDir);
}
3)网页分析:
![](https://i-blog.csdnimg.cn/blog_migrate/8f900a89c6347c561fdf2122f13be562.gif)
![](https://i-blog.csdnimg.cn/blog_migrate/961ddebeb323a10fe0623af514929fc1.gif)
///
<summary>
/// 根据网站URL获取CSS
/// 分析CSS获取背景图片地址
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
protected List < Uri > FetchCSSWithSite( string url)
{
StringBuilder sourceCSS = new StringBuilder();
List < Uri > list = new List < Uri > ();
using (WebClient client = new WebClient())
{
_basicUri = new Uri(url);
string sourceHtml = client.DownloadString(_basicUri);
sourceCSS.Append(sourceHtml);
Regex regex = new Regex(LINK_PATTERN, RegexOptions.IgnoreCase);
MatchCollection collection = regex.Matches(sourceHtml);
if (collection == null ) return null ;
string extension = string .Empty;
string link = string .Empty;
foreach (Match match in collection)
{
link = match.Groups[ " link " ].Value;
lvLog.Items.Add( new ListViewItem( new string [] { new Uri(_basicUri, link).AbsoluteUri, DateTime.Now.ToString(TIME_FORMAT), STATUS_ANALYSIS, string .Empty, link.Contains( " . " ) ? link.Substring(link.LastIndexOf( ' . ' )) : string .Empty }));
if ( ! link.Contains( " . " )) continue ;
extension = link.Substring(link.LastIndexOf( ' . ' ));
switch (extension.ToUpper())
{
case " .CSS " :
sourceCSS.Append(client.DownloadString( new Uri(_basicUri, link)));
break ;
case " .GIF " :
case " .PNG " :
case " .JPG " :
case " .JPEG " :
list.Add( new Uri(_basicUri, link));
break ;
default :
break ;
}
}
}
list.AddRange(FetchBGImageUrlsWithCSS(sourceCSS.ToString()));
return list;
}
/// 根据网站URL获取CSS
/// 分析CSS获取背景图片地址
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
protected List < Uri > FetchCSSWithSite( string url)
{
StringBuilder sourceCSS = new StringBuilder();
List < Uri > list = new List < Uri > ();
using (WebClient client = new WebClient())
{
_basicUri = new Uri(url);
string sourceHtml = client.DownloadString(_basicUri);
sourceCSS.Append(sourceHtml);
Regex regex = new Regex(LINK_PATTERN, RegexOptions.IgnoreCase);
MatchCollection collection = regex.Matches(sourceHtml);
if (collection == null ) return null ;
string extension = string .Empty;
string link = string .Empty;
foreach (Match match in collection)
{
link = match.Groups[ " link " ].Value;
lvLog.Items.Add( new ListViewItem( new string [] { new Uri(_basicUri, link).AbsoluteUri, DateTime.Now.ToString(TIME_FORMAT), STATUS_ANALYSIS, string .Empty, link.Contains( " . " ) ? link.Substring(link.LastIndexOf( ' . ' )) : string .Empty }));
if ( ! link.Contains( " . " )) continue ;
extension = link.Substring(link.LastIndexOf( ' . ' ));
switch (extension.ToUpper())
{
case " .CSS " :
sourceCSS.Append(client.DownloadString( new Uri(_basicUri, link)));
break ;
case " .GIF " :
case " .PNG " :
case " .JPG " :
case " .JPEG " :
list.Add( new Uri(_basicUri, link));
break ;
default :
break ;
}
}
}
list.AddRange(FetchBGImageUrlsWithCSS(sourceCSS.ToString()));
return list;
}
4)效果图