自己写的采集

using  System;
using  System.Data;
using  System.Configuration;
using  System.Collections;
using  System.Web;
using  System.Web.Security;
using  System.Web.UI;
using  System.Web.UI.WebControls;
using  System.Web.UI.WebControls.WebParts;
using  System.Web.UI.HtmlControls;

using  System.IO;
using  System.Net;
using  System.Text;
using  Dang.Utils;

namespace  MyTest.CaiJi
{
    
public partial class caiji01 : System.Web.UI.Page
    
{
        
public static string reAllListHtml,reAllHtml,htmlAddress,flashAddress;
        
public static string RelativeWay, RootWay, htmlListLu;
        
public static ArrayList alist;//地址列表
        public static int getSavedNum;
        
protected void Page_Load(object sender, EventArgs e)
        
{
            getSavedNum 
= 0;
        }


        
protected void Button1_Click(object sender, EventArgs e)
        
{
            htmlAddress 
= TextBox1.Text.Trim().ToLower();

            RelativeWay 
= htmlAddress.Substring(0, htmlAddress.LastIndexOf("/")) + "/";//相对路径
            RootWay = htmlAddress.Substring(0, htmlAddress.IndexOf("/"7)) + "/";//根路径
            
            reAllListHtml 
= GetPageHTML(htmlAddress);
            Panel1.Visible 
= true;
        }

        

        
protected void Button3_Click(object sender, EventArgs e)
        
{
            
string starstr = TextBox2.Text.Trim();
            
string endstr = TextBox3.Text.Trim();
            alist 
= Get_fileAddresss(starstr, endstr, reAllListHtml);
            Panel1.Visible 
= false;
            StringDo.Visible 
= true;
        }


        
protected void Button2_Click(object sender, EventArgs e)
        
{
            
for (int i = 0; i < alist.Count; i++)
            
{
                getflash(alist[i].ToString());
            }
   
            
        }


        
protected void Go_Click(object sender, EventArgs e)
        
{
            
for (int i = 0; i < alist.Count; i++)
            
{
                getflash(alist[i].ToString());
            }
   
        }



        
public void getflash(string url)
        
{
            reAllHtml 
= GetPageHTML(url);
            
string starstr = StartUrlString.Text.Trim();
            
string endstr = EndUrlString.Text.Trim();
            flashAddress 
= Get_fileAddress(starstr, endstr, reAllHtml);
            
bool isSave = false;
            
if (flashAddress != null)
            
{
                flashAddress 
= Tohttp(flashAddress);
                isSave 
= SaveFileFromUrl(flashAddress);
            }
           

            
            
if (isSave)
                Label1.Text 
+= url + "成功!<br />";
            
else
                Label1.Text 
+= url + "失败!<br />";
            getSavedNum 
+= 1;
        }



        
public string Tohttp(string str)
        
{
            
if (str.StartsWith("/"))
                str 
= RootWay + str;
            
if (!str.StartsWith("http://"))
                str 
= RelativeWay + str;
            
return str;
        }

        
public string Get_fileAddress(string startstr,string endstr,string strResult)
        
{
            String temp 
= "";
            
int start, stop;

            start 
= strResult.IndexOf(startstr, 0, strResult.Length);
            stop 
= strResult.IndexOf(endstr, 0, strResult.Length);
            
if (start == -1 || stop == -1)
                
return null;
            temp 
= strResult.Substring(start+startstr.Length, stop-start-startstr.Length);
            
return temp;
        }


        
public ArrayList Get_fileAddresss(string startstr, string endstr, string strResult)
        
{
            ArrayList list 
= new ArrayList();
            
int start = 0, stop = 0;
            
while (start != -1 && stop!=-1)
            
{
                start 
= strResult.IndexOf(startstr, 0, strResult.Length);
                
if (start == -1)
                    
break;
                strResult 
= strResult.Substring(start + startstr.Length);
                stop 
= strResult.IndexOf(endstr, 0, strResult.Length);
                
if ( stop == -1)
                    
break;
                
string tempaddress = strResult.Substring(0, stop);
                tempaddress 
= Tohttp(tempaddress);
                list.Add(tempaddress);
                strResult 
= strResult.Substring(stop);
            }
            

            
return list;
        }



        
/// <summary>
        
/// 从文件地址下载文件到本地磁盘
        
/// </summary>
        
/// <param name="Url">文件网址</param>
        
/// <returns></returns>        

        public  bool SaveFileFromUrl(string Url)
        
{
            
if (Url.IndexOf("."== -1)
                
return false;
            
string fileExt = Url.Substring(Url.LastIndexOf("."+ 1);
            
bool Value = false;
            WebResponse response 
= null;
            Stream stream 
= null;

            
try
            
{
                HttpWebRequest request 
= (HttpWebRequest)WebRequest.Create(Url);

                response 
= request.GetResponse();
                stream 
= response.GetResponseStream();

                
if (!response.ContentType.ToLower().StartsWith("text/"))
                
{
                    Value 
= SaveBinaryFile("flashsrc", fileExt, response);
                }


            }

            
catch (Exception err)
            
{
                
string aa = err.ToString();
            }

            
return Value;
        }

       
        
/// <summary>
        
///  将二进制文件保存到磁盘
        
/// </summary>
        
/// <param name="fileDirectory">保存的目录flashsrc</param>
        
/// <param name="fileNameExt">保存的类型</param>
        
/// <param name="response">网络响应</param>
        
/// <returns></returns>

        private  bool SaveBinaryFile(string fileDirectory,string fileNameExt,WebResponse response)
        
{
            
bool Value = true;
            
byte[] buffer = new byte[1024];
            
string dirpath = Server.MapPath("/"+fileDirectory+"/");
            
if (Directory.Exists(dirpath) == false)
            
{
                Directory.CreateDirectory(dirpath);
            }

            
try
            
{
                
string FileName = dirpath + GetUniquelyString() + "." + fileNameExt;
                
if (File.Exists(FileName))
                    File.Delete(FileName);
                Stream outStream 
= System.IO.File.Create(FileName);
                Stream inStream 
= response.GetResponseStream();

                
int l;
                
do
                
{
                    l 
= inStream.Read(buffer, 0, buffer.Length);
                    
if (l > 0)
                        outStream.Write(buffer, 
0, l);
                }

                
while (l > 0);

                outStream.Close();
                inStream.Close();
            }

            
catch
            
{
                Value 
= false;
            }

            
return Value;
        }


        
/// <summary>
        
///  获取一个不重复的文件名
        
/// </summary>
        
/// <returns></returns>

        public static string GetUniquelyString()
        
{
            
const int RANDOM_MAX_VALUE = 1000;
            
string strTemp, strYear, strMonth, strDay, strHour, strMinute, strSecond, strMillisecond;
            Random rnd 
= new Random();
            DateTime dt 
= DateTime.Now;
            
int rndNumber = rnd.Next(RANDOM_MAX_VALUE);
            strYear 
= YieldRandNum(5);
            strMonth 
= (dt.Month > 9? dt.Month.ToString() : "i" + dt.Month.ToString();
            strDay 
= (dt.Day > 9? dt.Day.ToString() : "a" + dt.Day.ToString();
            strHour 
= (dt.Hour > 9? dt.Hour.ToString() : "n" + dt.Hour.ToString();
            strMinute 
= (dt.Minute > 9? dt.Minute.ToString() : "j" + dt.Minute.ToString();
            strSecond 
= (dt.Second > 9? dt.Second.ToString() : "n" + dt.Second.ToString();
            strMillisecond 
= dt.Millisecond.ToString();

            strTemp 
= strYear + strDay + strMonth + strHour + strMinute + strSecond + strMillisecond + rndNumber.ToString();
            strTemp 
= strTemp.Replace("1""q");

            
return strTemp;

        }


        
/// <summary>
        
/// 产生随机字母
        
/// </summary>
        
/// <param name="d"></param>
        
/// <returns></returns>

        public static string YieldRandNum(int d)
        
{
            
char[] seed ='A''b''B''c''C''d''D''e''E''f''F''G''h''H''i''j''J''k''K''L''m''M''n''N''p''P''q''Q''R''s''S''t''T''u''U''v''V''w''W''x''X''y''Y''z''Z' };
            
int seed_count = seed.Length;
            Random rand 
= new Random();
            StringBuilder sb 
= new StringBuilder(4);
            
for (int i = 0; i < d; i++)
                sb.Append(seed[rand.Next(
0, seed_count)]);
            
return sb.ToString();
        }


        
/// <summary>
        
///  获取给定Url PageHtml
        
/// </summary>
        
/// <param name="url">Url</param>
        
/// <returns>PageHtml</returns>

        public static string GetPageHTML(string url)
        
{
            WebRequest request 
= null;
            HttpWebResponse response 
= null;
            Stream stream 
= null;
            StreamReader sr 
= null;

            
try
            
{
                request 
= WebRequest.Create(url);
                request.Credentials 
= CredentialCache.DefaultCredentials;
                request.Timeout 
= 2000;
                response 
= request.GetResponse() as HttpWebResponse;

                stream 
= response.GetResponseStream();
                sr 
= new StreamReader(stream, Encoding.Default);
                
return sr.ReadToEnd();
            }

            
catch
            
{
                
return string.Empty;
            }

            
finally
            
{
                
if (sr != null)
                
{
                    sr.Close();
                    sr.Dispose();
                }


                
if (stream != null)
                
{
                    stream.Close();
                    stream.Dispose();
                }

            }

        }


       
        

       

    }


}



<% @ Page Language="C#" AutoEventWireup="true" CodeBehind="caiji01.aspx.cs" Inherits="MyTest.CaiJi.caiji01" validateRequest="false"  %>

<! DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" >

< html  xmlns ="http://www.w3.org/1999/xhtml"   >
< head  runat ="server" >
    
< title > 无标题页 </ title >
    
< script  type ="text/javascript" >
        
var xmlHttp;
        
var key;
        
function createXMLHttpRequest() {
            
if (window.ActiveXObject) {
                xmlHttp 
= new ActiveXObject("Microsoft.XMLHTTP");
            }
 
            
else if (window.XMLHttpRequest) {
                xmlHttp 
= new XMLHttpRequest();                
            }

        }

        
        
function pollServer() 
            createXMLHttpRequest();
            data 
= "load="+"";
            
var url = "Loading.aspx";
            xmlHttp.open(
"POST", url, true);
            xmlHttp.setRequestHeader(
"Content-Type","application/x-www-form-urlencoded");
            xmlHttp.onreadystatechange 
= pollCallback;
            xmlHttp.send(data);
        }


        
function pollCallback() {
            
if (xmlHttp.readyState == 4{
                
if (xmlHttp.status == 200{
                    
var percent_complete = xmlHttp.responseText;
                    
var progress = document.getElementById("progress");
                    
var progressPersent = document.getElementById("progressPersent");
                     progress.style.width 
= percent_complete + "%";
                     progressPersent.innerHTML 
= percent_complete + "%";
                    
if (percent_complete < 100{
                        setTimeout(
"pollServer()"2000);
                    }
 else {
                        document.getElementById(
"complete").innerHTML = "已生成完成!";

                    }

                }

            }

        }
 
        
           
 
function clearBar() {
   
var progress_bar = document.getElementById("progressBar");
   
var progressPersent = document.getElementById("progressPersent");
   
var complete = document.getElementById("complete");
   
//progress_bar.style.visibility = "visible"
   progressPersent.innerHTML = " ";
   complete.innerHTML 
= "开始生成!";
 }

 
function next()
 
{    
    pollServer();
    __doPostBack(
'Go','');
    
return  false;
 }

    
</ script >


</ head >
< body >
    
< form  id ="form1"  runat ="server" >
    
< div > 结果: < asp:Label
            
ID ="Label1"  runat ="server"  Text ="" ></ asp:Label >< br  />
            
        获取列表页面:
< asp:TextBox  ID ="TextBox1"  runat ="server" ></ asp:TextBox >
        
< asp:Button  ID ="Button1"  runat ="server"  Text ="下一步"  OnClick ="Button1_Click"   />
        
< asp:Panel  runat ="server"  ID ="Panel1"  Visible ="false" >
        开始循环标记:
< asp:TextBox  Text =""  TextMode ="MultiLine"  runat ="server"  ID ="TextBox2" ></ asp:TextBox >< br  />
        结束循环结束:
< asp:TextBox  Text =""  TextMode ="MultiLine"  runat ="server"  ID ="TextBox3" ></ asp:TextBox >< br  />
        
< asp:Button  ID ="Button3"  runat ="server"  Text ="下一步"  OnClick ="Button3_Click"    />
        
</ asp:Panel >
        
< asp:Panel  runat ="server"  ID ="StringDo"  Visible ="false" >
        开始标记:
< asp:TextBox  Text =""  TextMode ="MultiLine"  runat ="server"  ID ="StartUrlString" ></ asp:TextBox >< br  />
        结束标记:
< asp:TextBox  Text =""  TextMode ="MultiLine"  runat ="server"  ID ="EndUrlString" ></ asp:TextBox >< br  />
        
< href ="javascript:next();" > 下一步 </ a >< asp:Button  ID ="Button2"  runat ="server"  Text ="下一步"  OnClick ="Button2_Click"   />
        
< asp:LinkButton  ID ="Go"  runat ="server"  Text ="生成"  OnClick ="Go_Click" ></ asp:LinkButton >
        
</ asp:Panel >
    
</ div >
    
< div  id ="progressBar"  style ="padding:0px;border:solid black 0px;visibility:hidden" >
< table  width ="300"  border ="0"  cellspacing ="0"  cellpadding ="0"   align ="center"   >
  
< tr >
    
< td  align ="center"  id ="progressPersent"   > 0% </ td >
  
</ tr >
  
< tr  >
    
< td >
 
< table  width ="100%"  border ="1"  cellspacing ="0"  cellpadding ="0"  bordercolor ="#000000" >
  
< tr >
    
< td >
 
< table  width ="1%"  border ="0"  cellspacing ="0"  cellpadding ="0"  bgcolor ="#FF0000"  id ="progress"  height ="20" >
              
< tr >
                
< td >   </ td >
              
</ tr >
            
</ table ></ td >
  
</ tr >
</ table >
</ td >
  
</ tr >
  
< tr >
    
< td  align ="center"  id ="complete" ></ td >
  
</ tr >
</ table >
</ div >

    
</ form >
    
< script  type ="text/javascript" >
    clearBar();
    
</ script >
</ body >
</ html >

 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值