public string GetHtml(string url)
{
string code = DecodeData(url);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 30000;
request.Headers.Set("Pragma", "no-cache");
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream streamReceive = response.GetResponseStream();
Encoding encoding = code != string.Empty ? Encoding.GetEncoding(code.ToUpper()) : Encoding.Default;
StreamReader streamReader = new StreamReader(streamReceive, encoding);
string strResult = streamReader.ReadToEnd();
streamReader.Close();
streamReader.Dispose();
return strResult;
}
//http://blog.sunmast.com/natas/archive/2004/10/30/989.aspx,略有改动.
private string DecodeData(string Url)
{
WebRequest r = WebRequest.Create(Url);
WebResponse w = r.GetResponse();
//
// first see if content length header has charset = calue
//
String charset = string.Empty;
String ctype = w.Headers["content-type"];
if (ctype != null)
{
int ind = ctype.IndexOf("charset=");
if (ind != -1)
{
charset = ctype.Substring(ind + 8);
}
}
// save data to a memorystream
MemoryStream rawdata = new MemoryStream();
byte[] buffer = new byte[1024];
Stream rs = w.GetResponseStream();
int read = rs.Read(buffer, 0, buffer.Length);
while (read > 0)
{
rawdata.Write(buffer, 0, read);
read = rs.Read(buffer, 0, buffer.Length);
}
rs.Close();
//
// if ContentType is null, or did not contain charset, we search in body
//
if (charset == null)
{
MemoryStream ms = rawdata;
ms.Seek(0, SeekOrigin.Begin);
StreamReader srr = new StreamReader(ms, Encoding.ASCII);
String meta = srr.ReadToEnd();
if (meta != null)
{
int start_ind = meta.IndexOf("charset=");
int end_ind = -1;
if (start_ind != -1)
{
end_ind = meta.IndexOf("\"", start_ind);
if (end_ind != -1)
{
int start = start_ind + 8;
charset = meta.Substring(start, end_ind - start + 1);
charset = charset.TrimEnd(new Char[] { '>', '"' });
}
}
}
}
return charset.ToString();
}