ParseURL, UrlLike, URL解析函数与匹配函数

// Url.h: interface for the Url class.
//
//

#if !defined(_URL_H__)
#define _URL_H__

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#ifndef TRACE
    #define TRACE printf
#endif

#define URL_TRACE  TRACE
#define LIKE_TRACE URL_TRACE

struct STR_FIELD
{
    LPCSTR  str;
    UINT    len;
};

struct ST_URL
{
    STR_FIELD Scheme;       //协议
    STR_FIELD UserName;     //用户名
    STR_FIELD PassWord;     //用户密码
    STR_FIELD Host;         //主机
    STR_FIELD Port;         //端口
    STR_FIELD URI;          //相对与主机的URI地址
    STR_FIELD Path;         //文件路径
    STR_FIELD FileName;     //文件名
    STR_FIELD ExtraName;    //扩展名
    STR_FIELD QueryParam;   //请求参数
    STR_FIELD Anchor;       //热点(锚)
    UINT      PathDepth;    //路径深度
    UINT      ParamCount;   //参数个数
};

struct ST_PARAM
{
    STR_FIELD Name;
    STR_FIELD Value;
};

void ParseURL(LPCSTR pstr, ST_URL &url);
void ParseQueryParam(ST_URL &url, ST_PARAM *pField);
BOOL UrlLike(ST_URL &url1, ST_URL &url2, BOOL bCmpParamValue);
BOOL WINAPI FindURL(const char *urls, ST_URL &url2);

void DebugField(STR_FIELD &Field, LPCTSTR Name);
void DebugParam(STR_FIELD &NameField, STR_FIELD &ValueField );
void DebugUrl(ST_URL &url);


#endif // !defined(_URL_H__)
// Url.cpp: implementation of the Url class.
//
//

#include "stdafx.h"
#include "Url.h"
#include "shlwapi.h"
#pragma comment(lib, "shlwapi.lib")

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

//
// Construction/Destruction
//

void ParseURL(LPCSTR pstr, ST_URL &url)
{
    memset(&url,NULL,sizeof(ST_URL));

    const char *p0;
    const char *p1;
    const char *p2;
    const char *p3;


    char szSchemeFlag[]={':','/','/',0};

L1:
    if (*pstr==' ')
    {
        pstr++;
        goto L1;

    }else if (*pstr==0){

        return ;
    }

//----------------------------------

    
    p0=strstr(pstr,szSchemeFlag);
    
    p3=strchr(pstr,'?');


	if (p0)
	{
        if (p3)
        {
            if (p0<p3)
            {
                url.Scheme.str=pstr;
                url.Scheme.len=p0-pstr;

                pstr=p0+3;
            }
        }else{

            url.Scheme.str=pstr;
            url.Scheme.len=p0-pstr;

            pstr=p0+3;
        }

	}

//----------------------------------

    p0=strchr(pstr,'@');
    p1=strchr(pstr,'/');

    if (p0 && p0<p1 && (p3?p0<p3:1))
	{
        p2=strchr(pstr,':');

        if (p2 && p2<p0)
        {
            url.UserName.str=pstr;
            url.UserName.len=p2-pstr;

            url.PassWord.str=p2+1;
            url.PassWord.len=p0-p2-1;
        }else{
            url.UserName.str=pstr;
            url.UserName.len=p0-pstr;
        }

        pstr=p0+1;
	}

//----------------------------------
    p0=strchr(pstr,'/');

    if (p0 && p0>pstr && (p3?p0<p3:1))
    {
        p1=strchr(pstr,':');

        if (p1 && p1<p0)
        {
            url.Host.str=pstr;
            url.Host.len=p1-pstr;

            url.Port.str=p1+1;
            url.Port.len=p0-p1-1;

        }else{

            url.Host.str=pstr;
            url.Host.len=p0-pstr;
        }

        pstr=p0;
    }

//----------------------------------

    url.URI.str=pstr;
    url.URI.len=strlen(pstr);

//----------------------------------
    
    if (*pstr=='/')
    {
        p1=pstr;
        p0=pstr+1;
L2:
        if (*p0)
        {
            if (*p0=='/')
            {
                p1=p0;           //记录最后一个'/'字符开始指针
                url.PathDepth++;
            }else
            if (*p0=='?' || *p0=='#')
            {
                goto L4;
            }

            p0++;
            goto L2;
        }

    }else{

        p1=pstr;
        p0=pstr;
L3:
        if (*p0)
        {
            if (*p0=='/')
            {
                p1=p0;           //记录最后一个'/'字符开始指针
                url.PathDepth++;
            }else
            if (*p0=='?' || *p0=='#')
            {
                goto L4;
            }

            p0++;
            goto L3;
        }

    }
L4:

    url.Path.str=pstr;
    url.Path.len=p1-pstr;

    pstr=p1;


//----------------------------------
    if (*pstr=='/') pstr++;

    p0=pstr;

    p1=0; //记录'.'指针

L5:
    if (*p0)
    {
        if (*p0=='.')
        {
            p1=p0;               //记录最后一个'.'出现的指针
        }else
        if (*p0=='?' || *p0=='#')
        {
            goto L6;
        }

        p0++;
        goto L5;
    }

L6:
    p2=p0;

    url.FileName.str=pstr;
    url.FileName.len=p2-pstr;

    if (p1)
    {
        url.ExtraName.str=p1;
        url.ExtraName.len=p2-p1;
    }

    pstr=p2;

//----------------------------------
    if (*pstr=='?')
    {
        pstr++;
        p0=pstr;

        if (*p0!='#' && *p0!=0) url.ParamCount++;
L7:

        if (*p0=='&')
        {
            url.ParamCount++;
        }else
        if (*p0=='#' || *p0==0)
        {
            goto L8;
        }

        p0++;
        goto L7;


    L8:
        p2=p0;

        url.QueryParam.str=pstr;
        url.QueryParam.len=p2-pstr;

        pstr=p2;
    }
//----------------------------------
    if (*pstr=='#')
    {
        pstr++;

        url.Anchor.str=pstr;
        url.Anchor.len=strlen(pstr);
    }

}

void ParseQueryParam(ST_URL &url, ST_PARAM *pField)
{
    const char *p0;
    const char *p1;
    const char *p2;

    p0=url.QueryParam.str;
    p1=url.QueryParam.str;
    
    p2=0;

    UINT i=0;

L7:
    if ( i<url.ParamCount )
    {
        if (p2==0 && *p0=='=')
        {
            p2=p0;
        }

        if ( *p0=='&' || ((DWORD)p0-(DWORD)url.QueryParam.str)==url.QueryParam.len )
        {
            if (p2)
            {
                if (p2+1<p0)
                {
                    pField[i].Name.str=p1;
                    pField[i].Name.len=p2-p1;

                    pField[i].Value.str=p2+1;
                    pField[i].Value.len=p0-p2-1;
                }else if (p2+1==p0) {

                    pField[i].Name.str=p1;
                    pField[i].Name.len=p2-p1;

                    pField[i].Value.str=0;
                    pField[i].Value.len=0;
                }

            }else{

                pField[i].Name.str=p1;
                pField[i].Name.len=p0-p1;

                pField[i].Value.str=0;
                pField[i].Value.len=0;
            }

            p1=p0+1;
            p2=0;

            i++;
        }

        p0++;
        goto L7;
    }


    return;
}


//url1 规则
//url2 URL

BOOL UrlLike(ST_URL &url1, ST_URL &url2, BOOL bCmpParamValue)
{
    char Name[2][256];
    char Value[2][256];


    BOOL bHost=TRUE;
    BOOL bPath=TRUE;
    BOOL bFile=TRUE;
    BOOL bParam=TRUE;

    UINT i,j, Len;

    if (url1.Host.len && url2.Host.len)
    {

        Len=url1.Host.len;
        if (Len>255) Len=255;

        strncpy(Name[0], url1.Host.str,  Len);
        Name[0][Len]=0;

        Len=url2.Host.len;
        if (Len>255) Len=255;

        strncpy(Name[1], url2.Host.str,  Len);
        Name[1][Len]=0;

        LIKE_TRACE("Host %s %s\n", Name[1], Name[0]);

        bHost = StrStrIA(Name[1], Name[0])!=NULL;

    }

    if (url1.Path.len==0 && url1.FileName.len==0 && url1.ParamCount==0)
    {
        goto END;
    }

    if (url1.Path.len && url2.Path.len)
    {

        Len=url1.Path.len;
        if (Len>255) Len=255;

        strncpy(Name[0], url1.Path.str, Len);
        Name[0][Len]=0;

        Len=url2.Path.len;
        if (Len>255) Len=255;

        strncpy(Name[1], url2.Path.str, Len);
        Name[1][Len]=0;

        LIKE_TRACE("Path %s\n%s\n", Name[1], Name[0]);

        bPath = stricmp(Name[1],Name[0])==0;

    }else{

        bPath = (url1.Path.len==url2.Path.len);
    }

    if (url1.FileName.len==0 && url1.ParamCount==0)
    {
        goto END;
    }

    if (url1.FileName.len && url2.FileName.len)
    {
        Len=url1.FileName.len;
        if (Len>255) Len=255;

        strncpy(Name[0], url1.FileName.str, Len);
        Name[0][Len]=0;

        Len=url2.FileName.len;
        if (Len>255) Len=255;

        strncpy(Name[1], url2.FileName.str, Len);
        Name[1][Len]=0;

        LIKE_TRACE("FileName %s\n%s\n", Name[1], Name[0]);

        bFile = StrStrIA(Name[1],Name[0])!=NULL;

    }else{

        bFile = (url1.FileName.len==url2.FileName.len);
    }


    if (url1.ParamCount==0)
    {
        goto END;
    }


    if (url2.ParamCount==0){

        bParam = FALSE;

    }else if (url2.ParamCount){


        ST_PARAM *pParams1=new ST_PARAM[url1.ParamCount];
        ST_PARAM *pParams2=new ST_PARAM[url2.ParamCount];


        if (pParams1 && pParams2)
        {
            ParseQueryParam(url1,pParams1);
            ParseQueryParam(url2,pParams2);

            BOOL At=TRUE;

            for (i=0;i<url1.ParamCount;i++)
            {

                Len=pParams1[i].Name.len;
                if (Len>255) UINT Len=255;

                strncpy(Name[0], pParams1[i].Name.str,  Len);
                Name[0][Len]=0;

                LIKE_TRACE("Name2:%s\n", Name[0]);

                BOOL A=FALSE;

                for (j=0;j<url2.ParamCount;j++)
                {

                    Len=pParams2[j].Name.len;
                    if (Len>255) UINT Len=255;

                    strncpy(Name[1], pParams2[j].Name.str, Len);
                    Name[1][Len]=0;

                    LIKE_TRACE("Name1:%s\n", Name[1]);

                    //如果在2中找到keyName,退出查找下一个keyName
                    if ( StrStrIA(Name[1],Name[0]) )
                    {
                        if (bCmpParamValue && pParams1[i].Value.len && pParams2[j].Value.len)
                        {

                            Len=pParams1[i].Value.len;
                            if (Len>255) UINT Len=255;

                            strncpy(Value[0], pParams1[i].Value.str,  Len);
                            Value[0][Len]=0;


                            Len=pParams2[j].Value.len;
                            if (Len>255) UINT Len=255;

                            strncpy(Value[1], pParams2[j].Value.str, Len);
                            Value[1][Len]=0;

                            A= stricmp(Value[0],Value[1])==0;

                        }else{
                            A=TRUE;
                        }

                        break;

                    }else
                    if (url1.ParamCount==1 && url2.ParamCount==1 && pParams1[i].Value.len==0 && pParams2[j].Value.len==0 )
                    {
                        if (bCmpParamValue)
                        {

                            A = StrStrIA(Name[1],Name[0])!=0;

                        }else{
                        
                            A = TRUE;
                        }

                        URL_TRACE("OK\n");
                    }

        
                }

                At&=A; //统计是否全部存在于2中

            }

            bParam &= At;
        }

        if (pParams1) delete pParams1;
        if (pParams2) delete pParams2;
    }


END:

    return (bHost && bPath && bFile && bParam);
}


BOOL WINAPI FindURL(const char *urls, ST_URL &url2)
{
    const char *pstr=urls;
    const char *p;

L0:

    if ( *pstr == ' ' || *pstr == '\r' || *pstr == '\n')
    {
        pstr++;
        goto L0;
    }

    if ( (p=strchr(pstr,'\r'))!=NULL || (p=strchr(pstr,'\n'))!=NULL )
    {

        int   nLen=p-pstr;

        char *purl=(char *)malloc(nLen);
        memcpy(purl,pstr,nLen);
        purl[nLen]=0;

        ST_URL url;
        ParseURL(purl,url);

        if (UrlLike(url,url2,TRUE))
        {
            free(purl);
            return TRUE;
        }

        free(purl);

        pstr=p;

        goto L0;
    }


    return FALSE;
}




void DebugField(STR_FIELD &Field, LPCTSTR Name)
{
    char str[1024];
    strncpy(str, Field.str,   Field.len);
    str[Field.len]=0;
    URL_TRACE("%-12s: %s\n", Name, str);
}

void DebugParam(STR_FIELD &NameField, STR_FIELD &ValueField )
{
    char str[2][1024];
    strncpy(str[0], NameField.str,   NameField.len);
    str[0][NameField.len]=0;

    strncpy(str[1], ValueField.str,  ValueField.len);
    str[1][ValueField.len]=0;

    URL_TRACE("%-12s: %s\n", str[0], str[1]);
}


void DebugUrl(ST_URL &url)
{
    URL_TRACE("--------------------------------------------------\n");

    DebugField(url.Scheme,      "Scheme    ");
    DebugField(url.UserName,    "UserName  ");
    DebugField(url.PassWord,    "PassWord  ");
    DebugField(url.Host,        "Host      ");
    DebugField(url.Port,        "Port      ");
    DebugField(url.URI,         "URI       ");
    DebugField(url.Path,        "Path      ");
    DebugField(url.FileName,    "FileName  ");
    DebugField(url.ExtraName,   "ExtraName ");
    DebugField(url.QueryParam,  "QueryParam");
    DebugField(url.Anchor,      "Anchor    ");

    URL_TRACE("%-12s: %d\n", "PathDepth",  url.PathDepth);
    URL_TRACE("%-12s: %d\n", "ParamCount", url.ParamCount);

    URL_TRACE("-------------------------\n");
    

    ST_PARAM *pParams=new ST_PARAM[url.ParamCount];

    if (pParams)
    {
        ParseQueryParam(url,pParams);

        for (UINT j=0;j<url.ParamCount;j++)
        {
            DebugParam(pParams[j].Name, pParams[j].Value);
        }

        delete pParams;
    }

}



 
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值