C#的一个URL加载器,能处理编码、相对地址解析、GET/POST、HTML的include、页面重定向...

要让机器模拟上网,首要的问题是解决HTTP请求响应,看我们的Url加载器,功能比较强。它考虑了编码、URL的相对地址解析(见RFC),还可以POST数据,还有HTML里的<!--include-->,还有<head>里的重定向,很好用的。 

  1ExpandedBlockStart.gifContractedBlock.gif        /**//// <summary>
  2        /// 最基本的Url加载函数,其它重载函数均调用它
  3        /// </summary>
  4        /// <param name="url"></param>
  5        /// <param name="encoding"></param>
  6        /// <param name="postdata"></param>
  7        /// <param name="include">是否在客户端包含include文件</param>
  8        /// <param name="redirectioncounter">计算重定向的次数</param>
  9        /// <returns></returns>

 10        public static string LoadUrl(ref UrlOperation uo, string encoding, string postdata, bool include, int redirectioncounter)
 11ExpandedBlockStart.gifContractedBlock.gif        {
 12            string str;
 13
 14            string url=uo.Url;
 15            HttpWebRequest request;
 16            HttpWebResponse response;
 17
 18            //采用HTTP GET或者POST
 19            if (postdata == null)
 20                postdata = "";
 21            if (postdata.Length == 0)//HTTP GET
 22ExpandedSubBlockStart.gifContractedSubBlock.gif            {
 23                try
 24ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 25                    request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
 26                }

 27                catch
 28ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 29                    return "";
 30                }

 31
 32                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98)";
 33
 34                //超时异常发生在这里
 35                try
 36ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 37                    response = (HttpWebResponse)request.GetResponse();
 38                    //uo.Url = response.ResponseUri.ToString();
 39                }

 40                catch
 41ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 42                    return "";
 43                }

 44                
 45                System.IO.Stream stream = response.GetResponseStream();
 46
 47                Encoding source;
 48                try
 49ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 50                    source = Encoding.GetEncoding(encoding);
 51                }

 52                catch
 53ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 54                    source = Encoding.UTF8;
 55                }

 56
 57                StreamReader sr = new StreamReader(stream, source);
 58                try
 59ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 60                    str = sr.ReadToEnd();
 61                }

 62                catch 
 63ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 64                    return "";
 65                }

 66                sr.Close();
 67                stream.Close();
 68            }

 69            else//HTTP POST
 70ExpandedSubBlockStart.gifContractedSubBlock.gif            {
 71                try
 72ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 73                    ASCIIEncoding asciiencoding = new ASCIIEncoding();
 74                    byte[] bytes = asciiencoding.GetBytes(postdata);
 75
 76                    request = (HttpWebRequest)System.Net.HttpWebRequest.Create(url);
 77                    request.Method = "POST";
 78                    request.ContentType = "application/x-www-form-urlencoded";
 79                    request.ContentLength = postdata.Length;
 80
 81                    Stream poststream = request.GetRequestStream();
 82                    poststream.Write(bytes, 0, bytes.Length);
 83                    poststream.Close();
 84
 85                    response = (HttpWebResponse)request.GetResponse();
 86
 87                    StreamReader sr = new StreamReader(response.GetResponseStream(), System.Text.Encoding.GetEncoding("GB2312"));
 88                    str = sr.ReadToEnd();
 89                    response.Close();
 90                }

 91                catch
 92ExpandedSubBlockStart.gifContractedSubBlock.gif                {
 93                    return "";
 94                }

 95            }

 96
 97            uo.Url = response.ResponseUri.ToString();
 98
 99            //在客户端包含include文件
100            if (include)
101ExpandedSubBlockStart.gifContractedSubBlock.gif            {
102                System.Text.RegularExpressions.Regex regex = new Regex(@"<!--\W*include.*?-->", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
103                MatchCollection mc = regex.Matches(str);
104                if (mc.Count > 0)
105ExpandedSubBlockStart.gifContractedSubBlock.gif                {
106                    System.Text.RegularExpressions.Regex urlregex = new Regex("(?<=\").*(?=\")", RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.Singleline);
107
108                    string[] segments = regex.Split(str);
109
110                    StringBuilder sb = new StringBuilder();
111                    sb.Append(segments[0]);
112                    for (int i = 1; i <= mc.Count; i++)
113ExpandedSubBlockStart.gifContractedSubBlock.gif                    {
114                        string s = mc[i - 1].Value;
115                        string newurl = urlregex.Match(s).Value;
116                        UrlOperation newuo = uo.Forward(newurl);
117                        string included = LoadUrl(ref newuo, encoding, ""true);
118                        sb.Append(included);
119                        sb.Append(segments[i]);
120                    }

121
122                    str = sb.ToString();
123                }

124            }

125
126            //页面重定向
127            string redirection=GetRedirection(str).Trim();
128            if (redirection.Length > 0&&redirectioncounter<5)
129ExpandedSubBlockStart.gifContractedSubBlock.gif            {
130                uo=uo.Forward(redirection);
131                return LoadUrl(ref uo, encoding, postdata, include, redirectioncounter + 1);
132            }

133            else
134                return str;
135        }

转载于:https://www.cnblogs.com/fery/archive/2009/11/20/1606867.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值