获取网页数据并分析

最近写一个抓网页并提取页面信息的小东东(需要登陆的网站)

  登陆的标识: UID和COOKIE

  获取后把UID和COOKIE写入ini以备使用

  获取网页数据的一些基本方法

  (1) (CHttpConnection* )m_pConnection=(CInternet *Session) m_Sessions.GetHttpConnection(_T(m_HostAdderss),m_Port);

  (2)m_pConnection->OpenRequest(m_Method,m_GetPath+m_FileName,m_Referer,1,NULL,m_HttpVersion,
                                                               INTERNET_FLAG_EXISTING_CONNECT );
  (3)pot = (CHttpFile* )m_pFile->SendRequest(
  _T(Headers),(LPVOID)(LPCTSTR)_T(this->m_SendMeg),m_SendMeg.GetLength());

  (4)m_pFile->ReadString(tempString)

       分析页面数据的一个片段(命名很混乱 ^_^)

  filedata.Find(BZ);//BZ:关键字
      filedata = filedata.Mid(pot);
      count = filedata.Replace(BZ,BZ);
      filedata = filedata.Mid(18);
      for(int i=0;i<count;i++)
   {
      pot = filedata.Find(BZ);//分成几个联赛
      if( pot == -1)
    {
      pot = filedata.GetLength();
      lsdata_ls = filedata.Left(pot);
    }
    else
    {
      lsdata_ls = filedata.Left(pot);
                    filedata = filedata.Mid(pot+18);
    }
    
    //每个联赛再分
    hj =0;hjj =0;
    Newdata += "'";
    pot = lsdata_ls.Find(">");
    lsdata_ls = lsdata_ls.Mid(pot);
    pot2 = lsdata_ls.Find("<");
    Newdata += lsdata_ls.Mid(1,pot2-1);
    Newdata += "',";
    lsdata_ls = lsdata_ls.Mid(pot2+20); 
    
     kk = lsdata_ls;
     while(kk.GetLength()>10)
     {
      gameid = "'";//得到gameid
      pot = kk.Find("javascript:DoVote");
      if(pot==-1)break;
      kk =kk.Mid(pot);
      gameid = kk.Mid(23,7)+"',";

      pot = m_game.Replace(gameid,gameid);
      if(pot==0)
       m_game +=gameid;
                     kk = kk.Mid(30);
     }
    

      while(lsdata_ls.GetLength()>10) 
    {
     pot= lsdata_ls.Find("</TR>");
     lsdata_cc = lsdata_ls.Left(pot);
     lsdata_ls = lsdata_ls.Mid(pot+6);
     
     while(lsdata_cc.GetLength()>10)
     {   
         if(lsdata_cc.GetLength()<300)//和局
      {
       hj++;
       while(1)
       {
         pot = lsdata_cc.Find(">");      
         lsdata_cc = lsdata_cc.Mid(pot);
         pot2 = lsdata_cc.Find("<");
         if(pot2 == 1)
         {
          lsdata_cc = lsdata_cc.Mid(pot2);
          continue;
         } 
         Newdata += "'";
         Newdata += lsdata_cc.Mid(1,pot2-1);
         lsdata_cc = lsdata_cc.Mid(pot2+1);
         if(lsdata_cc.GetLength()<6){
         Newdata+="/n";break;}
         Newdata += "',";
        
       }
      }
      else
      {  hjj++;
         if( (hjj>1)&&(hj==0) )
         Newdata +="/n";
         pot = lsdata_cc.Find("<TD");//时间
         lsdata_cc = lsdata_cc.Mid(pot+3);
         pot = lsdata_cc.Find(">");
         pot2 = lsdata_cc.Find("</TD>");
         Newdata +="'";
         Newdata += lsdata_cc.Mid(pot+1,pot2-1);
         Newdata +="',";
         lsdata_cc = lsdata_cc.Mid(pot2+4);
        
         for(int i=0;i<2;i++)
         {
          Newdata +="'";
          pot = lsdata_cc.Find("<tr>");//主客队
          lsdata_cc = lsdata_cc.Mid(pot+4);       
          pot2 = lsdata_cc.Find("</tr>");
          lsdata_zd = lsdata_cc.Mid(1,pot2);
          lsdata_cc = lsdata_cc.Mid(pot2+1);
          while(lsdata_zd.GetLength()>3)
          {
           pot = lsdata_zd.Find(">");
           lsdata_zd = lsdata_zd.Mid(pot);
           pot2 = lsdata_zd.Find("<");
           if(pot2 == 1)
           {
            lsdata_zd = lsdata_zd.Mid(pot2+1);
            continue;
           }
           Newdata += lsdata_zd.Mid(1,pot2-1);
           lsdata_zd = lsdata_zd.Mid(pot2+1);
          }
          Newdata +="',";
         }
         while(lsdata_cc.GetLength()>20)//其它
         {
          pot = lsdata_cc.Find(">");
          lsdata_cc = lsdata_cc.Mid(pot);
          pot2 = lsdata_cc.Find("<");  
          if(pot2 == 1)
          {
                                        lsdata_cc = lsdata_cc.Mid(pot2+1);
          continue;
          }
                                     Newdata +="'";
          Newdata += lsdata_cc.Mid(1,pot2-1);
          if(lsdata_cc.GetLength()<28)break;
          Newdata +="',";
          lsdata_cc = lsdata_cc.Mid(pot2+1);
         }
      }if(lsdata_cc.GetLength()<20)break;
                       
     }
       
       lsdata_ls = lsdata_ls.Mid(6);      
    }  Newdata += "/n"; 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值