using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;

namespace EFPlatform.Common.Helper

...{
public class WebRequestHelper

...{

RequestFileFromHtml#region RequestFileFromHtml
public static string RequestFileFromHtml(string html, string fileUrl, string filePath, string reservedHost)

...{
Uri url;
string fileExt;
string fileName;
WebClient c = new WebClient();
string p = @"((http|https|ftp):(//|\\){1}(([A-Za-z0-9_-])+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(S*/)((S)+[.]{1}(gif|jpg|png)))";
Regex r = new Regex(p, RegexOptions.Compiled | RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(html);

if(mc.Count > 0)

...{
List<Uri> urlList = new List<Uri>();
int matchIndex = 0;
bool repeated = false;

for(int i = 0;i < mc.Count;i++)

...{
url = new Uri(mc[i].Value);

for(int j = 0;j < urlList.Count;j++)

...{
if(url == urlList[j])

...{
repeated = true;
break;
}
}

if(!repeated && (url.Host.ToLower() != reservedHost.ToLower()))

...{
urlList.Add(url);
matchIndex++;
}
}

for(int i = 0;i < urlList.Count;i++)

...{
url = urlList[i];
fileExt = url.AbsoluteUri.Substring(url.AbsoluteUri.LastIndexOf("."));
fileName = string.Format("{0:yyMMddHHmmssff}{1}{2}", DateTime.Now, i, fileExt);

try

...{
c.DownloadFile(url, filePath + fileName);
html = html.Replace(url.AbsoluteUri, fileUrl + fileName);
}
catch

...{
}
}
}

return html;
}
#endregion
}

}
发表于 @ 2007年05月25日 08:46:00|评论(loading...)|编辑