using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Net; using System.Text.RegularExpressions; using System.IO; using System.Net.Security; namespace DetectAndDownload { public class HTMLutility { public readonly static HTMLutility GetInstance = new HTMLutility(); //private WebProxy proxy; int tryCount; public string GetHTML(Uri siteUri) { tryCount = 3; return GetHTML(siteUri, false); } private string GetHTML(Uri siteUri, bool useAuth) { string rtnValue = string.Empty; try { HttpWebRequest webRequest = (HttpWebRequest)HttpWebRequest.Create(siteUri);//WebRequest.GetSystemWebProxy().GetProxy(siteUri)); webRequest.KeepAlive = true; webRequest.AuthenticationLevel = System.Net.Security.AuthenticationLevel.MutualAuthRequested; if (useAuth) { NetworkCredential credential = new NetworkCredential("cs44689", "Spring015", "APAC"); CredentialCache credentialCache = new CredentialCache(); credentialCache.Add(siteUri, "NTLM", credential); webRequest.Proxy.Credentials = credentialCache; //WebProxy proxy = new WebProxy(); //webRequest.Proxy = webRequest.getde //webRequest.PreAuthenticate = true; } WebResponse webResponse = webRequest.GetResponse(); using (System.IO.StreamReader stream = new System.IO.StreamReader(webResponse.GetResponseStream())) { rtnValue = stream.ReadToEnd(); } } catch (System.Net.WebException webException) { if (webException.Message.Contains("(407)")) { tryCount--; if (tryCount > 0) GetHTML(siteUri, true); else throw webException; } } return rtnValue; } public List<string> GetAllUrls(string content) { Regex regexAnchor = new Regex(@"<a[^>]+href=/s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>/s]+))/s*[^>]*>(?<text>.*?)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline); List<string> rtnValue = new List<string>(); foreach (Match match in regexAnchor.Matches(content)) { int start, end; string tempURL = match.Value.Replace(" ", string.Empty); start = tempURL.IndexOf("href=") + 6; if (start == -1) continue; char quotationMark = tempURL[start - 1]; end = tempURL.IndexOf(quotationMark, start); if (start != -1 && end != -1 && end > start) { string realURL = tempURL.Substring(start, end - start); if (realURL.ToLower().EndsWith(".asp") || realURL.ToLower().EndsWith(".aspx") || realURL.ToLower().EndsWith(".jsp") || realURL.ToLower().EndsWith(".html") || realURL.ToLower().EndsWith(".htm") || realURL.ToLower().EndsWith("#") || realURL.ToLower().EndsWith("/") || realURL.ToLower().EndsWith(".net") || realURL.ToLower().EndsWith(".com") || realURL.ToLower().EndsWith(".cn") || realURL.ToLower().StartsWith("mailto:") || realURL.ToLower().StartsWith("javascript:") ) continue; rtnValue.Add(realURL); } } return rtnValue; } public bool DownLoad(string url, ref string message) { //url = "http://down.dvbbs.net/SoftDown.asp?ID=5391"; if (!url.ToLower().StartsWith("http")) { url = "http://" + url; } Uri siteUri = new Uri(url); HttpWebRequest webRequest = (HttpWebRequest)HttpWebRequest.Create(siteUri); webRequest.KeepAlive = true; webRequest.AuthenticationLevel = System.Net.Security.AuthenticationLevel.MutualAuthRequested; webRequest.Referer = url; try { WebResponse webResponse = webRequest.GetResponse(); string rtnValue = string.Empty; string filename = string.Empty; if (webResponse.ContentType == "application/octet-stream") { filename = webResponse.ResponseUri.AbsolutePath.Substring(1).Replace("/", "."); message = filename; } else if (webResponse.ContentType.StartsWith("text/html")) { message = "It is a page"; return false; } else { return false; } long lCurrentPos = 0; System.IO.Stream ns = webResponse.GetResponseStream(); System.IO.FileStream fs = new System.IO.FileStream(filename, System.IO.FileMode.Create); byte[] nbytes = new byte[512]; int nReadSize = 0; nReadSize = ns.Read(nbytes, 0, 512); while (nReadSize > 0) { fs.Write(nbytes, 0, nReadSize); nReadSize = ns.Read(nbytes, 0, 512); lCurrentPos = fs.Length; } fs.Close(); ns.Close(); return true; } catch (System.Net.WebException webException) { message = webException.Message; return false; } } public int ReverseIndexOf(string s, char c) { Char[] tempChar = s.ToCharArray(); Array.Reverse(tempChar); return (new string(tempChar).IndexOf(c) == -1 ? s.Length : s.Length - new string(tempChar).IndexOf(c)); } } }