关闭

asp.net抓取页面内容+下载文件

846人阅读 评论(0) 收藏 举报
分类:
 #region 抓取页面的内容
    public string GetHTML(string url)
    {
        ASCIIEncoding encoding = new ASCIIEncoding();
        byte[] postdata = encoding.GetBytes(posts);
        HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(url);
        myRequest.Method = "GET";
        myRequest.ContentType = "text/html;";
        myRequest.UserAgent = "Mozilla/5.0 (Windows NT 5.2; rv:14.0) Gecko/20100101 Firefox/14.0.1";
        myRequest.Host = "www.aizhan.com";
        //myRequest.Headers.Add("Host", "www.aizhan.com");
        myRequest.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
        myRequest.KeepAlive = true;

        HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
        StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.UTF8);
        string content = reader.ReadToEnd();
        return content;
    }
    public string GetHTML2(string url) {
        WebClient wc = new WebClient();
        wc.Credentials = CredentialCache.DefaultCredentials;
        byte[] btPageData = wc.DownloadData(url);
        string strTargetHtml = Encoding.UTF8.GetString(btPageData);
        wc.Dispose();
        return strTargetHtml;
    }
   #endregion
0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:98383次
    • 积分:1507
    • 等级:
    • 排名:千里之外
    • 原创:52篇
    • 转载:28篇
    • 译文:0篇
    • 评论:8条
    最新评论