xuyaogaijin

最新推荐文章于 2024-10-15 10:41:38 发布

hnney

最新推荐文章于 2024-10-15 10:41:38 发布

阅读量643

点赞数

文章标签： null url javascript

本文链接：https://blog.csdn.net/moonhnney/article/details/5294529

版权

//api
//必须以http://开头
bool CUrl::parseStartHttp(const char* line)
{
if(line == NULL )
  return false;
int dep = 7 ;
int nfind = 0;
const char* pHost = line + dep;
while(*pHost != '/0' && *pHost != '/' && *pHost != ':'){
  pHost++;
  //dep++;
  nfind++;
}
if(nfind == 0 )
  return false;

m_strHost = new(nothrow) char[nfind + 1];
if(m_strHost != NULL){
memcpy(m_strHost,line + dep,nfind);
m_strHost[nfind] = '/0';
}

nfind = 0;
if(*pHost == ':'){
  m_nPort = 0;
  pHost++;
  //dep++;
  while(*pHost>= '0' && *pHost <= '9' ){
   m_nPort = 10*m_nPort + *pHost;
   pHost++;
   //dep++;
  }
}

if(*pHost == '/'){
  m_strFile = newstring(pHost);
}
else{
  m_strFile = newstring("/");
}
dep = 0;
while(*pHost != '/0'){
  pHost++;
  if(*pHost == '/'){
   dep++;
  }
}
m_dep = dep;
return true;
}

/*
/a/b/c.d    根目录                     base: 去host,port
a/b.d     当前目录的文件夹下的文件
a.d      当前目录下的文件
../a.d      上一级目录的文件
../../a.d   上两级的目录

只处理以上几种。。其它丢弃。。
*/

bool CUrl::parseWithBase(const char* line,const char* base)
{
if(NULL == line || NULL == base)
return false;
CUrl url(base,NULL);
if( !url.IsValid())
return false;

m_strHost = newstring(url.GetHost());
m_nPort = url.GetPort();

int nfind = 0;
int nflag = -1;
if(*line == '/'){//表示根目录下
m_strFile = newstring(line);
}else{
if(startWitch(line,"../../") && url.GetDep() > 1){//表示上两级

的目录
nflag = 3;
}else if(startWitch(line,"../") && url.GetDep() > 0){//上一级目

录
   nflag = 2;
  }else if(isalpha(*line)){//当前目录下
   nflag = 1;
  }

  if(nflag == -1)
   return false;
  else{
   //base : http://a/b/c.gif
   //line : image.gif
   const char* p = url.GetFile();

    // /a/b/c.gif
   int len = strlen(p);
   const char* pTmp =p + len - 1;

// f
if(pTmp == p ){

//首地址=尾地址有且只有当为一

个‘/’时的情况
m_strFile = new(nothrow) char[1 + strlen(line)

+ 1];
    if(m_strFile != NULL){
     m_strFile[0] = '/';
     strcpy(m_strFile + 1,line);
    }
   }else{
    while( pTmp != p ){
     if(*pTmp == '/'){
      nfind++;
     }
     if(nfind == nflag)
      break;
     pTmp--;
    }
    len = pTmp - p + 1 ;
    if(nflag == 1)
     nfind = 0;
    else if(nflag == 2)
     nfind = 3;
    else if(nflag == 3)
     nfind = 6;
    m_strFile = new(nothrow) char[strlen(line) -

nfind + len + 1];
    if(m_strFile != NULL){
     memcpy(m_strFile,p,len);
     strcpy(m_strFile+len,line + nfind);
    }
   }
  }
  return true;
}
}
// int nfind = 0;
// if(*line == '/'){//表示根目录下
//   m_strFile = newstring(line);
// }else if(startWitch(line,"../") && url.GetDep() > 0){ //上一级目录
//   const char* p = url.GetFile();
//   const char* pTmp =p + strlen(p) - 1;
//   while( pTmp != p ){
//    if(*pTmp == '/'){
//     nfind++;
//    }
//    if(nfind == 2)
//     break;
//   }
//   int len = pTmp - p ;
//   nfind = strlen(line + 2);
//   m_strFile = new(nothrow) char[nfind + len + 1];
//   if(m_strFile != NULL){
//    memcpy(m_strFile,p,len);
//    strcpy(m_strFile+len,line);
//   }
// }else if(startWitch(line,"../../") && url.GetDep() > 1){ //表示上两级的

目录
//   const char* p = url.GetFile();
//   const char* pTmp =p + strlen(p) - 1;
//   while( pTmp != p ){
//    if(*pTmp == '/'){
//     nfind++;
//    }
//    if(nfind == 3)
//     break;
//   }
//   int len = pTmp - p ;
//   nfind = strlen(line + 5);
//   m_strFile = new(nothrow) char[nfind + len + 1];
//   if(m_strFile != NULL){
//    memcpy(m_strFile,p,len);
//    strcpy(m_strFile+len,line);
//   }
// }else if( (*line >= 'A' && *line <= 'Z') || (*line >='a' && *line

<='z')){
//   const char* p = url.GetFile();
//   const char* pTmp =p + strlen(p) - 1;
//   while( pTmp != p ){
//    if(*pTmp == '/'){
//     nfind++;
//    }
//    if(nfind == 1)
//     break;
//   }
//   int len = pTmp - p ;
//   m_strFile = new(nothrow) char[strlen(line) + len + 1];
//   if(m_strFile != NULL){
//    if(nfind == 1){
//     memcpy(m_strFile + 1,p,len);
//     strcpy(m_strFile+len + 1,line);
//    }else{
//     memcpy(m_strFile,p,len);
//     strcpy(m_strFile+len,line);
//    }
//
//   }
// }
// else
//   return false;
//return true;
//}
CUrl::CUrl(const char *line,const char* base)
{
m_strHost = NULL;
m_strFile = NULL;
m_nPort = 80;
m_dep = 0;
if(startWitch(line,"http://")){
  parseStartHttp(line);
}else{
  if( strncasecmp(line,"javascript",10) &&
   strncasecmp(line,"mailto:",7) ){
   parseWithBase(line,base);
  }
}
}
CUrl::~CUrl()
{
Release();
}

void CUrl::Release()
{
RELEASE(m_strFile);
RELEASE(m_strHost);
}

void CUrl::Print()
{
if(m_strHost==NULL || m_strFile == NULL){
  fprintf(stderr,"url is null/n");
  return ;
}
if(m_nPort == 80){
  fprintf(stderr,"http://%s%s/n",m_strHost,m_strFile);
}else{
  fprintf(stderr,"http://%s:%d%s/n",m_strHost,m_nPort,m_strFile);
}
return ;
}

int CUrl::SaveUrl(int dep)
{
char u[1024] = {0};
int n = dep;
if(m_nPort == 80){
snprintf(u,1024,"http://%s%s",m_strHost,m_strFile);
}else{
snprintf(u,1024,"http://%s:%d%s",m_strHost,m_nPort,m_strFile);
}

//
if( CContainUrl::PushUrl(u,n) > 0 ){
//Print();
}
return 0;
}

int CUrl::GiveUrl(char* url,int nlen)
{
if(url == NULL)
return -1;

if(m_nPort == 80){
snprintf(url,nlen,"http://%s%s",m_strHost,m_strFile);
}else{
snprintf(url,nlen,"http://%s:%d%

s",m_strHost,m_nPort,m_strFile);
}
return 1;
}

CHtml::CHtml(const char* szData,const char* strBase)
{
m_strbase = NULL;

m_strHtmlData = szData;
m_strBaseurl = strBase;
m_strbase = (char*)strBase;
}

CHtml::~CHtml()
{
//RELEASE(m_strbase);
}

int CHtml::parsecontentEx(int dep)
{
int nRet = -1;
int nDep = dep -1;
if(m_strHtmlData == NULL)
return nRet;
const char* pData = m_strHtmlData;

//HTTP/1.1 200 Ok/r/n
if( strncasecmp(pData,"HTTP/",5))
  return nRet;
pData += 9;
m_nAnswerCode = atoi(pData);
if(*pData == '2'){
  if(nDep < 0)
   return nRet;
  for ( ; *pData != '/0'; ++pData){
   if ( *pData == '<'){
    pData++;
    if(*pData == '!' && *(pData + 1) == '-' && *

(pData + 1) == '-')//注释
continue;
//找注释的结尾

//meta 例如:baidu.com URL里存在 META 中处理

meta的跳转页面
if( (*pData != 0) && ( *pData == 'M' || *pData

== 'm')
     && (*(pData + 3) != '/0')){
      //strncasecmp(pData,"meta",4);
      if( strncasecmp

(pData,"meta",4))
       continue;
      pData += 4;
      while(*pData != '/0' && *pData

!= '>'){
pData++;
if( (*pData != '/0') &&

(*pData == 'h' || *pData == 'H')){ //http://
if(strncasecmp

(pData,"http:",5) )

continue;
        //找到http:
        if (

findMetaTag(pData,nDep) == NULL )

return

nRet;
        nRet++;
        pData += 5;
       }

}
}else if( *pData != 0 && ( *pData == 'A' ||

*pData == 'a') ){

     while(*pData != '/0' && *pData != '>'){
      pData++;
      if( (*pData != '/0') && (*pData

== 'h' || *pData == 'H')){ //http://
//strncasecmp

(pData,"href",4);
if( strncasecmp

(pData,"href",4))
continue;

if(findhrefTag

(pData,nDep) == NULL)
        return nRet;
       nRet++;
       pData += 4;
      }
     }
    }
   }
  }
  //HTTP/1.1 304 Ok/r/n
}else if(*pData == '3'){
  pData += 6;
  for ( ; *pData != '/0' ; ++pData ){

if( (*pData != '/0') && *pData == '/r'&& *(pData + 1)

== '/n'){//找到回车
    pData += 2;
    if( (*pData != '/0') && (*pData == 'l' ||

*pData == 'L')){
if( !strncasecmp(pData,"location:",9)){

pData += 9;
while(*pData != '/0' && *pData

== ' ')pData++;
if(*pData == '/0')
return nRet;

      const char* end = pData;
      int poslen = 0;
      while (*end != '/0' && *end !=

'/r' && *end != '/n' && *end != ' '){
       poslen++;
       end++;
      }
      if( poslen > 0){
       SaveUrl

(pData,poslen,dep);
       nRet++;
      }
      if(*end == '/0')
       return nRet;
      pData = end + 1;
     }
    }
   }
  }
}
return nRet;
}