关于使用HtmlAgilityPack

请直接看代码:

 

         ///   <summary>
        
///  根据输入的地址获取其文档节点对象
        
///   </summary>
        
///   <param name="url"> 地址 </param>
        
///   <returns></returns>
         public  static HtmlAgilityPack.HtmlNode GetHtmlNodeFromLink( string url)
        {
             try{
                Uri uri =  new Uri(url);

                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(uri);
                WebResponse response = request.GetResponse();

                Stream stream = response.GetResponseStream();
                StreamReader read =  new StreamReader(stream, Encoding.GetEncoding( " gb2312 "));
                 string str = read.ReadToEnd();

                HtmlAgilityPack.HtmlDocument html =  new HtmlAgilityPack.HtmlDocument();
                html.LoadHtml(str);
                 return html.DocumentNode;
            }
             catch{ return  null;}
        }

         ///   <summary>
        
///  根据输入的URL地址输出指定XPATH下的节点集合
        
///   </summary>
        
///   <param name="url"> 地址 </param>
        
///   <param name="xPath"> 过滤地址 </param>
        
///   <param name="imgs"> 过滤地址 </param>
        
///   <param name="links"> 过滤地址 </param>
        
///   <param name="title"> 标题 </param>
        
///   <returns></returns>
         public  static  bool GetGalleryInfo(HtmlAgilityPack.HtmlNode htmlNode, string xPath, ref  string[] imgs,  ref  string[] links, ref  string[] title)
        {
             try
            {
                HtmlNodeCollection hnc = htmlNode.SelectNodes(xPath); // " // div[@class='slideBannerA homeSlideAD1']"
                 if (hnc.Count <  1)
                     return  false;
                links =  new  string[hnc.Count];
                title =  new  string[hnc.Count];
                imgs =  new  string[hnc.Count];
                 int i =  0;
                 string cateDataRegex =  @" background-image:url\((?<image>.+)\) ";
                Regex re =  new Regex(cateDataRegex, RegexOptions.IgnoreCase | RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace);
                 foreach (HtmlNode node  in hnc)
                {
                    HtmlAttributeCollection hac = node.Attributes;
                    links[i] = hac[ " href "].Value;
                    imgs[i] = hac[ " style "] ==  null ? hac[ " src2 "].Value : re.Match(hac[ " style "].Value).Groups[ " image "].Value;
                    title[i++] =  string.IsNullOrEmpty(hac[ " title "].Value) ? hac[ " alt "].Value : hac[ " title "].Value;
                }
                 return  true;
            }
             catch {  return  false; }
        }
        
         // 调用 
        
             string[] strLink;
             string[] strLinAlt;
             string[] strImg;
             string urls =  " http://www.newegg.com.cn ";
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink(urls);
            GetGalleryInfo(nodes,  " //div[@class='slideBannerA homeSlideAD1']/div[1]/div[1]/a "out strImg,  out strLink, out strLinAlt);

 

淘宝今日活动:

///   <summary>
        
///  淘宝今日活动
        
///   </summary>
        
///   <param name="htmlNode"> 页面节点集合 </param>
        
///   <param name="xPath"> 选择的路径 </param>
        
///   <param name="imgs"> 图片集合 </param>
        
///   <param name="links"> 链接集合 </param>
        
///  调用:
        
///     string[] strLink;
        
///     string[] strImg;
        
///     HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink(" http://www.taobao.com ");
        
///     GetTaobaoGalleryInfo(nodes, "//div[@class='sub-promotion-content']/div[@class='ks-switchable-content zoom']/ul/li", out strImg, out strLink);
        
///   <returns></returns>
         public  static  bool GetTaobaoGalleryInfo(HtmlAgilityPack.HtmlNode htmlNode,  string xPath,  out  string[] imgs,  out  string[] links) // , ref string[] title)
        {
            HtmlNodeCollection hnc = htmlNode.SelectNodes(xPath); // " // div[@class='slideBannerA homeSlideAD1']"
            links =  new  string[hnc.Count];
            imgs =  new  string[hnc.Count];
             try
            {
                 if (hnc.Count <  1)
                     return  false;
                 int i =  0;
                 foreach (HtmlNode node  in hnc)
                {
                    links[i] = node.ChildNodes[ 1].Attributes[ " href "].Value;
                    imgs[i++] = node.ChildNodes[ 1].ChildNodes[ 0].Attributes[ " src "].Value;
                }
                 return  true;
            }
             catch {  return  false; }
        }

 

 

  // 今日炸弹
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.newegg.com.cn/ ");

            HtmlAgilityPack.HtmlNode node = nodes.SelectSingleNode( " //div[@class='colSub']/div[@class='picBanner shellShocker ']/a "); // " // div[@class='slideBannerA homeSlideAD1']"
           
             string strImg = node.Attributes[ " href "].Value;
             string strSrc= node.ChildNodes[ 0].Attributes[ " src "].Value;

 

 

             // 淘宝类别活动
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.taobao.com ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //span[@class='category-pop']/a "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strText =  new  string[node.Count];

             try
            {
                 int i =  0;
                 foreach (HtmlNode htmlNode  in node)
                {
                    strLink[i] = htmlNode.Attributes[ " href "].Value;
                    strText[i++] = htmlNode.InnerText;
                }
            }
             catch { }

 

 

// 淘宝-服侍-新品推荐
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://fushi.taobao.com ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='new-product-image-list']/ul[@class='image-list']/li "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];

             try
            {
                 int i =  0;
                 foreach (HtmlNode htmlNode  in node)
                {
                    strLink[i] = htmlNode.ChildNodes[ 0].Attributes[ " href "].Value;
                    strAlt[i] = htmlNode.ChildNodes[ 0].ChildNodes[ 1].InnerHtml;
                    strImg[i++] = htmlNode.ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                }
            }
             catch { }

 

 

// 针织衫推荐
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://rihan.vancl.com/ ", " UTF-8 ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='prod_area']/ul/li "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] = htmlNode.ChildNodes[ 0].Attributes[ " href "].Value;
                    strAlt[i] = htmlNode.ChildNodes[ 4].ChildNodes[ 1].InnerHtml.Trim();
                    strImg[i] = htmlNode.ChildNodes[ 0].ChildNodes[ 1].Attributes[ " src "].Value;
                    strPrice[i] = htmlNode.ChildNodes[ 6].ChildNodes[ 1].ChildNodes[ 1].InnerHtml.Trim().Replace( " """);
                    strCurrentPrice[i++] = htmlNode.ChildNodes[ 6].ChildNodes[ 2].InnerHtml.Trim().Replace( " 售价¥ """);
                }
                 catch { }
            }

 

 

         private  void button8_Click( object sender, EventArgs e)
        {
             // http://www.masamaso.com   商品列表
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.masamaso.com/ "" UTF-8 ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //ul/li/div[@class='goods_case'] "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] =  " http://www.masamaso.com/ " + htmlNode.ChildNodes[ 1].ChildNodes[ 0].Attributes[ " href "].Value;
                    strAlt[i] = htmlNode.ChildNodes[ 1].ChildNodes[ 0].Attributes[ " title "].Value;
                    strImg[i] = htmlNode.ChildNodes[ 1].ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                     // strPrice[i] = htmlNode.ChildNodes[6].ChildNodes[1].ChildNodes[1].InnerHtml.Trim().Replace("¥", "");
                    strCurrentPrice[i++] = htmlNode.ChildNodes[ 3].ChildNodes[ 1].ChildNodes[ 1].ChildNodes[ 0].InnerHtml.Trim().Replace( " &yen; """);
                }
                 catch 
                { }
            }
        }

         private  void button9_Click( object sender, EventArgs e)
        {
             // http://www.masamaso.com/   弹出广告
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.masamaso.com/ "" UTF-8 ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='foot_img tabContainer']/div[@class='tabBox']/div[@class='hd_tp'] "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] = htmlNode.ChildNodes[ 0].Attributes[ " href "].Value;
                     // strAlt[i] = htmlNode.ChildNodes[1].ChildNodes[0].Attributes["title"].Value;
                    strImg[i++] = htmlNode.ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                     // strPrice[i] = htmlNode.ChildNodes[6].ChildNodes[1].ChildNodes[1].InnerHtml.Trim().Replace("¥", "");
                    
// strCurrentPrice[i++] = htmlNode.ChildNodes[3].ChildNodes[1].ChildNodes[1].ChildNodes[0].InnerHtml.Trim().Replace("&yen;", "");
                }
                 catch 
                { }
            }

            Func();
        }

         private  void Func()
        {
             // http://www.vivian.cn/   弹出广告
            HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.vivian.cn/ "" UTF-8 ");
            HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='foot_img tabContainer']/div[@class='tabBox']/div[@class='hd_tp'] "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] = htmlNode.ChildNodes[ 0].Attributes[ " href "].Value;
                     // strAlt[i] = htmlNode.ChildNodes[1].ChildNodes[0].Attributes["title"].Value;
                    strImg[i++] = htmlNode.ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                     // strPrice[i] = htmlNode.ChildNodes[6].ChildNodes[1].ChildNodes[1].InnerHtml.Trim().Replace("¥", "");
                    
// strCurrentPrice[i++] = htmlNode.ChildNodes[3].ChildNodes[1].ChildNodes[1].ChildNodes[0].InnerHtml.Trim().Replace("&yen;", "");
                }
                 catch 
                { }
            }

        }

         private  void button10_Click( object sender, EventArgs e)
        {
             // http://www.vivian.cn/ "   产品列表
             HtmlAgilityPack.HtmlNode nodes = GetHtmlNodeFromLink( " http://www.vivian.cn/ "" UTF-8 ");
             HtmlAgilityPack.HtmlNodeCollection node = nodes.SelectNodes( " //div[@class='goods_list']/ul/li "); // " // div[@class='slideBannerA homeSlideAD1']"

             string[] strLink =  new  string[node.Count];
             string[] strImg =  new  string[node.Count];
             string[] strAlt =  new  string[node.Count];
             string[] strPrice =  new  string[node.Count];
             string[] strCurrentPrice =  new  string[node.Count];

             int i =  0;
             foreach (HtmlNode htmlNode  in node)
            {
                 try
                {
                    strLink[i] =  " http://www.vivian.cn/ " + htmlNode.ChildNodes[ 1].ChildNodes[ 1].ChildNodes[ 0].Attributes[ " href "].Value;
                    strAlt[i] = htmlNode.ChildNodes[ 1].ChildNodes[ 1].ChildNodes[ 0].Attributes[ " title "].Value;
                    strImg[i] = htmlNode.ChildNodes[ 1].ChildNodes[ 1].ChildNodes[ 0].ChildNodes[ 0].Attributes[ " src "].Value;
                     // strPrice[i] = htmlNode.ChildNodes[6].ChildNodes[1].ChildNodes[1].InnerHtml.Trim().Replace("¥", "");
                    strCurrentPrice[i++] = htmlNode.ChildNodes[ 1].ChildNodes[ 3].ChildNodes[ 3].ChildNodes[ 0].ChildNodes[ 1].InnerHtml.Trim().Replace( " &yen; """);
                }
                 catch 
                { }
            }
        }

 

 

 

 

 

 

转载于:https://www.cnblogs.com/bober/archive/2011/10/27/2226794.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值