C#下载网页源码的方法

 

 

 

 

using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Data; using System.Net; using System.IO; using System.Text.RegularExpressions;

namespace ConsoleApplication1 {     class Program     {         static void Main(string[] args)         {             WebClient client = new WebClient();             string URLAddress = "http://files.cnblogs.com/scy251147/EDaemonSolution.zip";             int n = URLAddress.LastIndexOf("/");             string fileName = URLAddress.Substring(n + 1, URLAddress.Length - n - 1);             string Dir = "D:";             string Path = Dir + "\\" + fileName + ".zip";             try             {                 WebRequest myre = WebRequest.Create(URLAddress);

            }             catch (WebException ex)             { Console.WriteLine(ex.ToString());             }             try             {                 client.DownloadFile(URLAddress, fileName);                 Stream str = client.OpenRead(URLAddress);                 StreamReader reader = new StreamReader(str);                 byte[] mbyte = new byte[100000];                 int allmybyte = (int)mbyte.Length;                 int strartbyte = 0;                 while (allmybyte > 0)                 {                     int m = str.Read(mbyte, strartbyte, allmybyte);                     if (m == 0)                     {                         break;                     }                     strartbyte += m;                     allmybyte -= m;                 }                 FileStream fstr = new FileStream(Path, FileMode.OpenOrCreate, FileAccess.Write);                 fstr.Write(mbyte, 0, strartbyte);                 str.Close();                 fstr.Close();             }             catch (Exception ex)             {                 Console.WriteLine(ex.ToString());               }         }     } }

 

 

 

 

 

 

C#下载网页源码的方法,用到msxml2

using System.Text.RegularExpressions;

using MSXML2;

 

private string gethtm(string link)//------------------------------------下载网页源码

        {

 

 

            MSXML2.XMLHTTP xmlhttp = new MSXML2.XMLHTTP();

            Thread.Sleep(5);

            xmlhttp.open("GET", link, false, null, null);

            Thread.Sleep(5);

            xmlhttp.send("");

            Thread.Sleep(5);

            Byte[] b = (Byte[])xmlhttp.responseBody;           

            string str_txt_htm = Encoding.GetEncoding("GB2312").GetString(b).Trim();

            //txtbox.Text = str_txt_htm;

            //Thread.Sleep(5);//暂停线程

            xmlhttp = null;

            b = null;          

            Regex str_re_htm = new Regex(@"<\s*body(.|\n)*", RegexOptions.IgnoreCase);

            Thread.Sleep(5);//暂停线程

            MatchCollection str_re_txt_htm = str_re_htm.Matches(str_txt_htm);

            str_txt_htm = str_re_txt_htm[0].ToString();          

            Thread.Sleep(5);//暂停线程

            str_re_htm = new Regex(@"(<[.|\n]*?script(.|\n)*?/[.|\n]*?script[\n]*>)|(\n)", RegexOptions.IgnoreCase);

            Thread.Sleep(5);//暂停线程

            str_txt_htm = str_re_htm.Replace(str_txt_htm, "");

            str_txt_htm = str_txt_htm.Replace("\\", "/");

            //File.Delete(@"C:\Documents and Settings\zjc\Local Settings\Temporary Internet Files\" + Path.GetFileNameWithoutExtension(link)+"[1].htm");

            //Directory.

            str_re_htm = null;

            link = null;

            return str_txt_htm;         

 

        }

转载于:https://www.cnblogs.com/vmyspace/archive/2013/05/16/3082562.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值