using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
using System.Web;
namespace RegexPractice
{
class program
{
static void Main(string[] args)
{
string entryPageUrl = "http://diggfoto.com/archive/?showall=1";
string encodingName = "utf-8";
List<PhotoInfo> photoList = PhotoInfo.ExtractPhotoList(entryPageUrl, encodingName);
List<PhotoInfo> selectedPhotoList = new List<PhotoInfo>();
foreach (PhotoInfo photoInfo in photoList)
{
DateTime date = new DateTime(int.Parse(photoInfo.Year), int.Parse(photoInfo.Month), int.Parse(photoInfo.Day));
//if (date >= DateTime.Parse("2011-07-01"))
//{
selectedPhotoList.Add(photoInfo);
//}
}
photoList.Clear();
foreach (PhotoInfo photoInfo in selectedPhotoList)
{
string photoUriPath = string.Format("http://diggfoto.com/{0}/{1}/{2}/{3}", photoInfo.Year, photoInfo.Month, photoInfo.Day, photoInfo.Path);
string pageSource = Util.GetPageSource(photoUriPath, "utf-8");
Regex regex = new Regex("title=.*?\" src=\"(?<sourcePath>.*?)\".*? width=\"(?<width>\\d{1,})\" height=\"(?<height>\\d{1,})\"");
Match match = regex.Match(pageSource);
if ((int.Parse(match.Groups["width"].Value) >= 1039)&&(int.Parse(match.Groups["height"].Value)>=737))
{
string sourcePath = match.Groups["sourcePath"].Value;
byte[] pageSourceBytes = Util.GetPageSourceBytes(sourcePath);
if (!Directory.Exists(photoInfo.TargetSubDirPath))
{
Directory.CreateDirectory(photoInfo.TargetSubDirPath);
}
using (FileStream fs = new FileStream(photoInfo.FilePath, FileMode.Create, FileAccess.ReadWrite))
{
fs.Write(pageSourceBytes, 0, pageSourceBytes.Length);
}
}
}
}
}
class Util
{
public static byte[] GetPageSourceBytes(string uri)
{
WebClient wc = new WebClient();
byte[] pageSourceBytes = wc.DownloadData(new Uri(uri));
return pageSourceBytes;
}
public static string GetPageSource(string uri, string encodingName)
{
byte[] pageSourceBytes = GetPageSourceBytes(uri);
string pageSource = Encoding.GetEncoding(encodingName).GetString(pageSourceBytes);
return pageSource;
}
}
class PhotoInfo
{
public static Regex PhotoRegex = new Regex("'http://diggfoto.com/(?<year>\\d{4})/(?<month>\\d{2})/(?<day>\\d{2})/(?<path>.*?)/'.*?>(?<title>.*?)<");
public static List<PhotoInfo> ExtractPhotoList(string url, string encodingName)
{
string pageSource = Util.GetPageSource(url, encodingName);
MatchCollection mc = PhotoRegex.Matches(pageSource);
List<PhotoInfo> photoList = new List<PhotoInfo>();
foreach (Match match in mc)
{
PhotoInfo photoInfo = new PhotoInfo();
photoInfo.Year = match.Groups["year"].Value;
photoInfo.Month = match.Groups["month"].Value;
photoInfo.Day = match.Groups["day"].Value;
photoInfo.Path = match.Groups["path"].Value;
photoInfo.Title = match.Groups["title"].Value;
photoList.Add(photoInfo);
}
return photoList;
}
public string Year { get; set; }
public string Month { get; set; }
public string Day { get; set; }
public string Path { get; set; }
public string Title { get; set; }
public string TargetSubDirPath
{
get
{
return string.Format("c:\\{0}{1}{2}", Year, Month, Day);
}
}
public string FilePath
{
get
{
return string.Format("{0}\\{1}.jpg",TargetSubDirPath,Title);
}
}
public string GetTargetUrl()
{
return string.Format("http://diggfoto.com/{0}/{1}/{2}/{3}/", Year, Month, Day, Path);
}
}