一些字符集转换的代码

#include "main.h"
#include<errno.h>
/*
   %40  @
   &lt; <
   &gt; >
   &amp;        &
   &apos;       '
   &quot;       "
   nbsp;        ' '
 */


//common simple chinese character encode
char *encode_type[] =
{
        "gbk",
        "utf-8",
        "utf8",
        "unicode",
        "gb2312",
        "gb-2312",
        "gb18030",
        "gb-18030"
};

//------------------------------------------------------------------------------------
//ÍõÁÁ 2008-1-22Ìí¼Ó

#define GB2312_LOW_LEFT         0xB0 //gb2312µÍλÆðʼ
#define GB2312_LOW_RIGHT        0xF7 //gb2312µÍλ½áÊø
#define GB2312_HIGH_LEFT        0xA0 //gb2312¸ßλÆðʼ
#define GB2312_HIGH_RIGHT       0xFE //gb2312¸ßλ½áÊø
#define GBK_LOW_LEFT            0x81 //gbkµÍλÆðʼ
#define GBK_LOW_RIGHT           0xFE //gbkµÍλ½áÊø
#define GBK_HIGH_LEFT           0x40 //gbk¸ßλÆðʼ
#define GBK_HIGH_RIGHT          0xFE //gbk¸ßλ½áÊø
#define BIG5_LOW_LEFT           0x81 //big5µÍλÆðʼ
#define BIG5_LOW_RIGHT          0xFE //big5µÍλ½áÊø
#define BIG5_HIGH1_LEFT         0x40 //big5¸ßλ1Æðʼ
#define BIG5_HIGH1_RIGHT        0x7E //big5¸ßλ1½áÊø
#define BIG5_HIGH2_LEFT         0xA1 //big5¸ßλ2Æðʼ
#define BIG5_HIGH2_RIGHT        0xFE //big5¸ßλ2½áÊø
#define MAX_LENGTH                      2

int IsGB2312(char *src,int len)
{
        //-------------------------------
        //º¯ÊýÃû:   IsGB2312
        //×÷ÓÃ:  УÑé×Ö·ûÊDz»ÊÇgb2312µÄ±àÂë
        //ÊäÈë²ÎÊý: srcÐèҪת»¯µÄ×Ö·û£¬len×Ö·û³¤¶È
        //·µ»ØÖµ: 0 È·ÈÏ 1 ·ñ¶¨ ³É¹¦·µ»Ø 0 ʧ°Ü·µ»Ø 1
        //-------------------------------

        if(len <= MAX_LENGTH)
                return 1;

        int i;
        for(i = 0; i < len; ++i)
        {

                if((unsigned char)*src >= GB2312_LOW_LEFT &&
                        (unsigned char)*src <= GB2312_LOW_RIGHT &&
                        (unsigned char)*(src + 1) >= GB2312_HIGH_LEFT &&
                        (unsigned char)*(src + 1) <= GB2312_HIGH_RIGHT)
                                return 0;

                ++src;
        }

        return 1;
}

int IsGBK(char *src,int len)
{
        //-------------------------------
        //º¯ÊýÃû:   IsGBK
        //×÷ÓÃ:  УÑé×Ö·ûÊDz»ÊÇgbkµÄ±àÂë
        //ÊäÈë²ÎÊý: srcÐèҪת»¯µÄ×Ö·û£¬len×Ö·û³¤¶È
        //·µ»ØÖµ: 0 È·ÈÏ 1 ·ñ¶¨ ³É¹¦·µ»Ø 0 ʧ°Ü·µ»Ø 1
        //-------------------------------

        if(len <= MAX_LENGTH)
                return 1;

        int i;
        for(i = 0; i < len; ++i)
        {
                if((unsigned char)*src >= GBK_LOW_LEFT &&
                        (unsigned char)*src <= GBK_LOW_RIGHT &&
                        (unsigned char)*(src + 1) >= GBK_HIGH_LEFT &&
                        (unsigned char)*(src + 1) <= GBK_HIGH_RIGHT)
                        return 0;

                ++src;
        }

        return 1;
}

int IsBig5(char *src,int len)
{
        //УÑé×Ö·ûÊDz»ÊÇbig5µÄ±àÂë
        //Big5 0x81-0xFE£¨129-255£©     0x40-0x7E£¨64-126£©
        //0xA1£­0xFE£¨161-254£©

        if(len <= MAX_LENGTH)
                return 1;

        int i;
        for(i = 0;i < len; ++i)
        {
                if((unsigned char)*src >= 0x81 && (unsigned char)*src <= 0xFE)
                {
                        //Åжϵڶþλ
                        unsigned char str2 = *(src + 1);
                        if((str2 >= 0xA1 && str2 <= 0xFE) || ( 0x40 >= 161 && str2 <= 0x7E))
                                return 0;
                }
        }

        return 1;
}
//------------------------------------------------------------------------------------
//find string needle from haystack ,case ignored .haystack and needle must be ASCII string
//return the pointer point to needle found in haystack,NULL if not found
char *strcasestr(const char *haystack,const char *needle)
{

        const char *p_haystack = NULL;
        const char *p_needle = NULL;
        const char *p = NULL;


        if(haystack == NULL || needle == NULL)
                return NULL;

        for(p_haystack=haystack;*p_haystack != 0;p_haystack++)
        {
                for(p_needle=needle,p=p_haystack;*p_needle!=0&&*p!=0;p_needle++,p++)
                {
                        if(*p_needle == *p
                                ||(*p_needle>64&&*p_needle<91&&*p_needle==(*p-32))
                                ||(*p_needle>96&&*p_needle<123&&*p_needle==(*p+32)))
                                continue;
                        else
                                break;
                }
                if(*p_needle == 0)//reach end of needle
                        return (char *)p_haystack;
        }
        return NULL;
}

int url_decode2(char *dst, const char *src, int src_len)
{
        char c1,c2;
        int i = 0;
        int j = 0;
        u_char *p_dst = (u_char *)dst;
        const u_char *p_src =  (u_char *)src;
        if ( ( dst == NULL ) || ( src == NULL )  )
                return -1;
        for ( i = 0, j = 0; i < src_len; )
        {
                if ( *(p_src+i) == '=' )
                {
                        c1 = tolower( *(p_src+i+1) );
                        c2 = tolower( *(p_src+i+2) );
                        if ( !isxdigit(c1) || !isxdigit(c2) )
                        {
                                /*If not urlencoded data then copy it*/
                                *(p_dst+j) = *(p_src+i);
                                *(p_dst+j+1) = *(p_src+i+1);
                                *(p_dst+j+2) = *(p_src+i+2);
                                j += 3;
                                i += 3;
                        }
                        else
                        {
                                if ( c1 <= '9' )
                                        c1 = c1 - '0';
                                else
                                        c1 = c1 - 'a' + 10;
                                if ( c2 <= '9' )
                                        c2 = c2 - '0';
                                else
                                        c2 = c2 - 'a' + 10;
                                *(p_dst+j) = 16 * c1 + c2;
                                j++;
                                i += 3;
                        }
                }
                else if ( *(p_src+i) == '+' )
                {
                        *(p_dst+j) = ' ';
                        j++;
                        i++;
                }
                else
                {
                        *(p_dst+j) = *(p_src+i);
                        j++;
                        i++;
                }
        }
        return j;
}

int  code_convert(char *from_charset, char *to_charset, char *src, size_t src_len, char *des, size_t des_len ){
        iconv_t cd = 0;
        char **pin = &src;
        char **pout = &des;
        cd = iconv_open(to_charset, from_charset);
        /*if(cd == (iconv_t)1000 );
          {
          puts("the iconv open result is -1/n");
          return -1;
          }*/
        //              printf("you come here ");//for test
        if(iconv(cd, pin, &src_len, pout, &des_len) == -1)
                return -1;
        iconv_close(cd);
        return 0;
}

void get_element_by_flag(char *buf, char *to, char *start_flag, char *end_flag)
{
        char *pstart = NULL, *pend = NULL;
        if(!buf || !to || !start_flag || !end_flag)
                return;
        pstart = strstr(buf, start_flag);
        if(pstart){
                pstart = pstart + strlen(start_flag);
                if(pstart){
                        pend = strstr(pstart, end_flag);
                        if(pend){
                                strncpy(to, pstart, pend - pstart);
                        }
                }
        }
}

void get_element_by_flag_end(char *buf, char *to, char *start_flag, char *pend)
{
        char *pstart = NULL;
        if(!buf || !to || !start_flag || !pend)
                return;
        pstart = strstr(buf, start_flag);
        if(pstart){
                pstart = pstart+strlen(start_flag);
                if(pstart){
                        if(pend>pstart){
                                strncpy(to, pstart, pend-pstart);
                        }
                }
        }
}

void set_string_end_by_char(char *start, char *end, char flag)
{
        int i = 0;
        if(!start || !end)
                return;
        for(i=0; start+i<end; i++){
                if(flag == *(start+i) ){
                        *(start+i) = '/0';
                        return;
                }
        }
}

void delete_html_tag(char *buf, char *to, char *html_flag, char *left_angle_bracket, char *right_angle_bracket)
{
        /*  %3C%2F="</"  %3E=">"*/
        /*  if buf is like "123<p>456</p>789"*/
        char *html_tag = NULL,*pend = NULL, *pstart = NULL;
        if(!buf)
                return;
        html_tag = strstr(buf, html_flag);
        if(html_tag)
        {
                /*  intercept "123"  */
                pend = strstr(buf, left_angle_bracket);
                if(pend)
                {
                        memmove(to, buf, (pend-buf));
                        to += (pend - buf);
                        buf += (pend - buf);
                }
                /*  intercept "456"  */
                pend = strstr(buf+strlen(left_angle_bracket), left_angle_bracket);
                while(pend)
                {
                        pstart = strstr(buf, right_angle_bracket);
                        if(pstart)
                        {
                                pend = strstr(pstart, left_angle_bracket);
                                if(pend-pstart>strlen(left_angle_bracket))
                                {
                                        memmove(to, pstart+strlen(right_angle_bracket), (pend-pstart-strlen(right_angle_bracket)));
                                        to=to+(pend-pstart-strlen(right_angle_bracket));
                                }
                        }
                        buf = pend+strlen(left_angle_bracket);
                        pend = strstr(buf, left_angle_bracket);
                }
                /*  intercept "789"  */
                pstart = strstr(buf, right_angle_bracket);
                if(pstart)
                {
                        pstart = pstart+strlen(right_angle_bracket);
                        memmove(to, pstart, strlen(pstart));
                        to = to + strlen(pstart);
                }
                *to = '/0';
        }
        else
        {
                while(*buf != '/0')
                        *to++ = *buf++;
                *to = '/0';
        }
}

void reset_http_esc(char *psrc, int length, char *pneedle, char c_reset)
{
        char *pthis=NULL;
        char *pclear=psrc+length-1;
        int success_flag =0;
        int i,j = 0;
        if(!psrc || !pneedle || length<3)
                return;
        i = strlen(pneedle);
        while(length>3){
                pthis = strstr(psrc, pneedle);
                if(pthis){
                        *pthis = c_reset;
                        memmove(pthis+1, pthis+i, length-(pthis-psrc)-i);
                        length -= (i-1);
                        success_flag++;
                }else{
                        break;
                }
        }
        if(success_flag)
        {
                for(j=0; j<success_flag; j++)
                {
                        while((--i))
                        {
                                *pclear-- ='/0';
                        }
                        i = strlen(pneedle);
                }
        }
        return;
}

static void decodeQuantum(unsigned char *dest, char *src)
{
        unsigned int x = 0;
        int i;
        for(i = 0; i < 4; i++) {
                if(src[i] >= 'A' && src[i] <= 'Z')
                        x = (x << 6) + (unsigned int)(src[i] - 'A' + 0);
                else if(src[i] >= 'a' && src[i] <= 'z')
                        x = (x << 6) + (unsigned int)(src[i] - 'a' + 26);
                else if(src[i] >= '0' && src[i] <= '9')
                        x = (x << 6) + (unsigned int)(src[i] - '0' + 52);
                else if(src[i] == '+')
                        x = (x << 6) + 62;
                else if(src[i] == '/')
                        x = (x << 6) + 63;
                else if(src[i] == '=')
                        x = (x << 6);
        }
        dest[2] = (unsigned char)(x & 255); x >>= 8;
        dest[1] = (unsigned char)(x & 255); x >>= 8;
        dest[0] = (unsigned char)(x & 255); x >>= 8;
}

static void base64Decode(unsigned char *dest, char *src, int *rawLength)
{
        int length = 0;
        int equalsTerm = 0;
        int i;
        int numQuantums;
        unsigned char lastQuantum[3];
        while((src[length] != '=') && src[length])
                length++;
        while(src[length+equalsTerm] == '=')
                equalsTerm++;
        numQuantums = (length + equalsTerm) / 4;
        if(rawLength)
                *rawLength = (numQuantums * 3) - equalsTerm;
        for(i = 0; i < numQuantums - 1; i++) {
                decodeQuantum(dest, src);
                dest += 3; src += 4;
        }
        decodeQuantum(lastQuantum, src);
        for(i = 0; i < 3 - equalsTerm; i++)
                dest[i] = lastQuantum[i];
}

static const char table64[]=
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
int find64(char c)
{
        int i=0;
        for(i=0;i<sizeof(table64);i++)
                if(c==table64[i])return 1;
        return 0;
}

/*
size_t base64_encode(const char *inp, size_t insize, char **outptr)
{
        unsigned char ibuf[3];
        unsigned char obuf[4];
        int i;
        int inputparts;
        char *output;
        char *base64data;
        char *indata = (char *)inp;
        *outptr = NULL;
        if(0 == insize)
                insize = strlen(indata);
        base64data = output = (char*)malloc(insize*4/3+4);
        if(NULL == output)
                return 0;
        while(insize > 0) {
                for (i = inputparts = 0; i < 3; i++) {
                        if(insize > 0) {
                                inputparts++;
                                ibuf[i] = *indata;
                                indata++;
                                insize--;
                        }
                        else
                                ibuf[i] = 0;
                }
                obuf [0] = (ibuf [0] & 0xFC) >> 2;
                obuf [1] = ((ibuf [0] & 0x03) << 4) | ((ibuf [1] & 0xF0) >> 4);
                obuf [2] = ((ibuf [1] & 0x0F) << 2) | ((ibuf [2] & 0xC0) >> 6);
                obuf [3] = ibuf [2] & 0x3F;
                switch(inputparts) {
                        case 1:
                                snprintf(output, 5, "%c%c==",
                                                table64[obuf[0]],
                                                table64[obuf[1]]);
                                break;
                        case 2:
                                snprintf(output, 5, "%c%c%c=",
                                                table64[obuf[0]],
                                                table64[obuf[1]],
                                                table64[obuf[2]]);
                                break;
                        default:
                                snprintf(output, 5, "%c%c%c%c",
                                                table64[obuf[0]],
                                                table64[obuf[1]],
                                                table64[obuf[2]],
                                                table64[obuf[3]] );
                                break;
                }
                output += 4;
        }
        *output=0;
        *outptr = base64data;
        return strlen(base64data);
}
*/
int base64_decode(const char *str,void *data)
{
        int ret;
        base64Decode((unsigned char *)data,(char *)str,&ret);
        return ret;
}
/*_____________________________________________Lastest base64__________________________________________________________*/
//---------------------------------------------------------------------------
//  4bit binary to char 0-F
char Hex2Chr( unsigned char n )
{
        n &= 0xF;
        if ( n < 10 )
                return ( char )( n + '0' );
        else
                return ( char )( n - 10 + 'A' );
}
//---------------------------------------------------------------------------
//  char 0-F to 4bit binary

unsigned char Chr2Hex( char c )
{
        if ( c >= 'a' && c <= 'z' )  //  it's toupper
                c = c - 'a' + 'A';
        if ( c >= '0' && c <= '9' )
                return ( int )( c - '0' );
        else if ( c >= 'A' && c <= 'F' )
                return ( int )( c - 'A' + 10 );
        else
                return -1;
}
//---------------------------------------------------------------------------
//  Base64 code table
//  0-63 : A-Z(25) a-z(51), 0-9(61), +(62), /(63)

char  Base2Chr( unsigned char n )
{
        n &= 0x3F;
        if ( n < 26 )
                return ( char )( n + 'A' );
        else if ( n < 52 )
                return ( char )( n - 26 + 'a' );
        else if ( n < 62 )
                return ( char )( n - 52 + '0' );
        else if ( n == 62 )
                return '+';
        else
                return '/';
}
//---------------------------------------------------------------------------

unsigned char Chr2Base( char c )
{
        if ( c >= 'A' && c <= 'Z' )
                return ( unsigned char )( c - 'A' );
        else if ( c >= 'a' && c <= 'z' )
                return ( unsigned char )( c - 'a' + 26 );
        else if ( c >= '0' && c <= '9' )
                return ( unsigned char )( c - '0' + 52 );
        else if ( c == '+' )
                return 62;
        else if ( c == '/' )
                return 63;
        else
                return 64;  //  ÎÞЧ×Ö·û
}
//---------------------------------------------------------------------------
//  aLen Ϊ aSrc µÄ´óС£¬ aDest ËùÖ¸µÄ»º³åÇø±ØÐëÖÁÉÙΪ aLen µÄ 3 ±¶£¡£¡£¡
//  ·µ»Ø aDest µÄ³¤¶È

int QPEncode( char * const aDest, const unsigned char * aSrc, int aLen )
{
        char * p = aDest;
        int    i = 0;
        while ( i++ < aLen )
        {
                *p++ = '=';
                *p++ = Hex2Chr( *aSrc >> 4 );
                *p++ = Hex2Chr( *aSrc++ );
        }
        *p = 0;  //  aDest is an ASCIIZ string
        return ( p - aDest );  //  exclude the end of zero
}
//---------------------------------------------------------------------------
//  aDest ËùÖ¸µÄ»º³åÇø±ØÐëÖÁÉÙΪ aSrc ³¤¶ÈµÄ 1/3 £¡£¡£¡
//  ·µ»Ø aDest µÄ³¤¶È

int QPDecode( unsigned char * const aDest, const char * aSrc )
{
        unsigned char * p = aDest;
        int             n = strlen( aSrc );
        unsigned char   ch, cl;
        while ( *aSrc )  //  aSrc is an ASCIIZ string
        {
                if ( ( *aSrc == '=' ) && ( n - 2 > 0 ) )
                {
                        ch = Chr2Hex( aSrc[1] );
                        cl = Chr2Hex( aSrc[2] );
                        if ( ( ch == ( unsigned char )-1 ) || ( cl == ( unsigned char )-1 ) )
                                *p++ = *aSrc++;
                        else
                        {
                                *p++ = ( ch << 4 ) | cl;
                                aSrc += 3;
                        }
                }
                else
                        *p++ = *aSrc++;
        }
        return ( p - aDest );
}
//---------------------------------------------------------------------------
//  aLen Ϊ aSrc µÄ³¤¶È£¬ aDest ËùÖ¸µÄ»º³åÇø±ØÐëÖÁÉÙΪ aLen µÄ 1.33 ±¶£¡£¡£¡
//  ·µ»Ø aDest µÄ³¤¶Èa

int Base64Encode( char * const aDest, const unsigned char * aSrc, int aLen )
{
        char        * p = aDest;
        int           i;
        unsigned char t=0;
        for ( i = 0; i < aLen; i++ )
        {
                switch ( i % 3 )
                {
                        case 0 :
                                *p++ = Base2Chr( *aSrc >> 2 );
                                t = ( *aSrc++ << 4 ) & 0x3F;
                                break;
                        case 1 :
                                *p++ = Base2Chr( t | ( *aSrc >> 4 ) );
                                t = ( *aSrc++ << 2 ) & 0x3F;
                                break;
                        case 2 :
                                *p++ = Base2Chr( t | ( *aSrc >> 6 ) );
                                *p++ = Base2Chr( *aSrc++ );
                                break;
                }
        }
        if ( aLen % 3 != 0 )
        {
                *p++ = Base2Chr( t );
                if ( aLen % 3 == 1 )
                        *p++ = '=';
                *p++ = '=';
        }
        *p = 0;  //  aDest is an ASCIIZ string
        return ( p - aDest );  //  exclude the end of zero
}

int Base64Decode( unsigned char * const aDest, const char * aSrc )
{
        unsigned char * p = aDest;
        int             i;
        int     n = strlen( aSrc );
        unsigned char   c;
        unsigned char   t=0;
        for ( i = 0; i < n; i++ )
        {
                if ( *aSrc == '=' )
                        break;
                do {
                        if ( *aSrc )
                                c = Chr2Base( *aSrc++ );
                        else
                                c = 65;  //  ×Ö·û´®½áÊø
                } while ( c == 64 );  //  Ìø¹ýÎÞЧ×Ö·û£¬Èç»Ø³µµÈ
                if ( c == 65 )
                        break;
                switch ( i % 4 )
                {
                        case 0 :
                                t = c << 2;
                                break;
                        case 1 :
                                *p++ = ( unsigned char )( t | ( c >> 4 ) );
                                t = ( unsigned char )( c << 4 );
                                break;
                        case 2 :
                                *p++ = ( unsigned char )( t | ( c >> 2 ) );
                                t = ( unsigned char )( c << 6 );
                                break;
                        case 3 :
                                *p++ = ( unsigned char )( t | c );
                                break;
                }
        }
        return ( p - aDest );
}
//---------------------------------------------------------------------------
/*_____________________________________________________________________________________________________________________*/

void qp(unsigned char sour,unsigned char first,unsigned char second)
{
        if(sour>127){
                first=sour>>4;
                second=sour&15;
                if(first>9)first+=55;
                else first+=48;
                if(second>9)second+=55;
                else second+=48;
        }
        printf("%c%c%c/n",'=',first,second);
}

void uqp(unsigned char sour,unsigned char first,unsigned char second)
{
        if(first>=65)first-=55;
        else first-=48;
        if(second>=65)second-=55;
        else second-=48;
        sour=0;
        sour=first<<4;
        sour|=second;
}

int EncodeQuoted(const unsigned char* pSrc, char* pDst, int nSrcLen, int nMaxLineLen)
{
        int nDstLen=0;
        int nLineLen=0;
        int i=0;
        for (i = 0; i < nSrcLen; i++, pSrc++){
                if ((*pSrc >= '!') && (*pSrc <= '~') && (*pSrc != '=')){
                        *pDst++ = (char)*pSrc;
                        nDstLen++;
                        nLineLen++;
                }else{
                        sprintf(pDst, "=%02X", *pSrc);
                        pDst += 3;
                        nDstLen += 3;
                        nLineLen += 3;
                }
                if (nLineLen >= nMaxLineLen - 3){
                        sprintf(pDst, "=/r/n");
                        pDst += 3;
                        nDstLen += 3;
                        nLineLen = 0;
                }
        }
        *pDst = '/0';
        return nDstLen;
}

int DecodeQuoted(const char* pSrc, unsigned char *pDst, int nSrcLen)
{
        int nDstLen=0;
        int i=0;
        while (i < nSrcLen){
                if (strncmp(pSrc, "=/r/n", 3) == 0){
                        pSrc += 3;
                        i += 3;
                }else{
                        if (*pSrc == '='){
                                sscanf(pSrc, "=%02X", (unsigned int *)pDst);
                                pDst = (unsigned char *)pDst;
                                pDst++;
                                pSrc += 3;
                                i += 3;
                        }else{
                                *pDst++ = (unsigned char)*pSrc++;
                                i++;
                        }
                        nDstLen++;
                }
        }
        *pDst = '/0';
        return nDstLen;
}

int is_utf8(const char *data)
{
        char *start;
        if ( data == NULL )
                return -1;
        start = strchr( data, '%' );
        if ( start == NULL )
                return -1;
        if ( ( *(start+1) == 'E' ) && ( *(start+3) == '%' ) && ( *(start+6) == '%' ) )
                return 1;
        else
                return 0;
        return -1;
}

/*urldecode*/
int url_decode( char *dst, const char *src, int src_len )
{
        char c1,c2;
        int i = 0;
        int j = 0;
        char *p_dst = dst;
        //      char *ph_dst = dst;
        const char *p_src = src;
        if ( ( dst == NULL ) || ( src == NULL )  || (src_len == 0))
                return -1;
        for ( i = 0, j = 0; i < src_len; )
        {
                if ( *(p_src+i) == '%' )
                {
                        c1 = tolower( *(p_src+i+1) );
                        c2 = tolower( *(p_src+i+2) );
                        if ( !isxdigit(c1) || !isxdigit(c2) )
                        {
                                /*If not urlencoded data then copy it*/
                                *(p_dst+j) = *(p_src+i);
                                *(p_dst+j+1) = *(p_src+i+1);
                                *(p_dst+j+2) = *(p_src+i+2);
                                j += 3;
                                i += 3;
                        }
                        else
                        {
                                if ( c1 <= '9' )
                                        c1 = c1 - '0';
                                else
                                        c1 = c1 - 'a' + 10;
                                if ( c2 <= '9' )
                                        c2 = c2 - '0';
                                else
                                        c2 = c2 - 'a' + 10;
                                *(p_dst+j) = 16 * c1 + c2;
                                j++;
                                i += 3;
                        }
                }
                else if ( *(p_src+i) == '+' )
                {
                        *(p_dst+j) = ' ';
                        j++;
                        i++;
                }
                else
                {
                        *(p_dst+j) = *(p_src+i);
                        j++;
                        i++;
                }
        }
        *(p_dst+j) = '/0';
        return j;
}

int convert_gbk_to_utf8(char *inbuf, int inlen, char *outbuf, int outlen)
{
        iconv_t cd;
        char **pin = &inbuf;
        char **pout = &outbuf;
        size_t inbufsize = inlen;
        size_t outbufsize = outlen;
        size_t *insize = &inbufsize;
        size_t *outsize = &outbufsize;
        cd = iconv_open("UTF-8", "GBK");
        if(cd == (iconv_t)(-1)) return -1;
        memset(outbuf, 0, outlen);
        if(iconv(cd, pin, insize, pout, outsize) == -1)
        {
                iconv_close(cd);
                return -1;
        }

        iconv_close(cd);
        return 0;
}
int convert_big5_to_utf8(char *inbuf,int inlen,char *outbuf,int outlen)
{
        iconv_t cd;
        char **pin = &inbuf;
        char **pout = &outbuf;
        size_t inbufsize = inlen;
        size_t outbufsize = outlen;
        size_t *insize = &inbufsize;
        size_t *outsize = &outbufsize;

        cd = iconv_open("UTF-8","big5");
        if(cd == (iconv_t)(-1)) return -1;

        memset(outbuf,0,outlen);
        if(iconv(cd,pin,insize,pout,outsize) == -1)
        {
                perror("iconv big5->utf8:");
                iconv_close(cd);
                return -1;
        }

        iconv_close(cd);
        return 0;
}

int convert_gb2312_to_utf8(char *inbuf,int inlen,char *outbuf,int outlen)
{
        iconv_t cd;
        char **pin = &inbuf;
        char **pout = &outbuf;
        size_t inbufsize = inlen;
        size_t outbufsize = outlen;
        size_t *insize = &inbufsize;
        size_t *outsize = &outbufsize;

        cd = iconv_open("UTF-8","GB2312");
        if(cd == (iconv_t)(-1)) return -1;

        memset(outbuf,0,outlen);
        if(iconv(cd,pin,insize,pout,outsize) == -1)
        {
                perror("iconv gb2312->utf8:");
                iconv_close(cd);
                return -1;
        }

        iconv_close(cd);
        return 0;
}

int convert_gb18030_to_utf8(char *inbuf,int inlen,char *outbuf,int outlen)
{
        iconv_t cd;
        char **pin = &inbuf;
        char **pout = &outbuf;
        size_t inbufsize = strlen(inbuf);
        size_t outbufsize = strlen(outbuf);
        size_t *insize = &inbufsize;
        size_t *outsize = &outbufsize;
        cd = iconv_open("UTF-8","GB18030");
        if(cd == (iconv_t)(-1)) return -1;

        memset(outbuf,0,outlen);
        if(iconv(cd,pin,insize,pout,outsize) == -1)
        {
                perror("iconv gb18030->utf8:");
                iconv_close(cd);
                return -1;
        }

        iconv_close(cd);
        return 0;
}

void convert_url_to_utf8(char *data, int len)
{
        if(NULL==data)
                return ;
        int flag;
        char inbuf[len+1];
        char midbuf[len+1];
        strncpy(inbuf, data, len);
        memset(data, 0, len+1);
        flag = is_utf8(inbuf);
        if (flag == 1){
                url_decode(data, inbuf, len);
        }else if (flag == 0){
                url_decode(midbuf, inbuf, len);
                convert_gbk_to_utf8(midbuf, strlen(midbuf), data, len);
                //convert_gbk_to_utf8(midbuf, len, data, len);
        }
        else
                strncpy(data, inbuf, len);
        return;
}




/*
   *function
   *return >=0:return index of array encode_type,
   * encode type will be stored in variable "type" if type is not NULL
   *return -1:dont know encode type
   *return <-1:error number
   */

int fuzzy_guess_encode_type(char *data,char *type)
{
        char *code_key[] = {"charset","encode"};//keyword of encode type
        char *p_key = NULL;
        char tmp[FUZZY_OFFSET] = {0};

        int i = 0;
        int j = 0;
        int data_len = 0;

        if(data==NULL ) return -1;
        data_len = strlen(data);

        for(i=0;i<(sizeof(code_key)/sizeof(void *));i++)
        {
                while((p_key=strcasestr(data,code_key[i])) != NULL)
                {
                //right side of keyword
                        strncpy(tmp,p_key+strlen(code_key[i]),
                        (FUZZY_OFFSET-1)<(data_len-(int)p_key-strlen(code_key[i])+(int)data)?(FUZZY_OFFSET-1):(data_len-(int)p_key-strlen(code_key[i])+(int)data));
                        
                        tmp[FUZZY_OFFSET] = 0;
                        //find possible encode type
                        for(j=0;j<(sizeof(encode_type)/(sizeof(void *)));j++)
                        {
                                /
                                if(strlen(tmp) >= strlen(encode_type[j])
                                        &&strcasestr(tmp,encode_type[j]))
                                {
                                        if(type != NULL)
                                                strcpy(type,encode_type[j]);
                                        return j;
                                }
                        }


                //left side of keyword
                        strncpy(tmp,p_key,
                        (FUZZY_OFFSET-1)<(p_key-data)?
                        (FUZZY_OFFSET-1):(p_key-data));
                        tmp[FUZZY_OFFSET] = 0;

                        for(j=0;j<sizeof(encode_type);j++)
                        {
                                if(strlen(tmp) >= strlen(encode_type[j])
                                        && strcasestr(tmp,encode_type[j]))
                                {
                                        if(type != NULL)
                                                strcpy(type,encode_type[j]);
                                        return j;
                                }
                        }

                }
        }

        return -1;//encodeing type not found

}


/*
   *return values:   0 type unknown   1 xml   2 mime  3 soap   <0 errors
   *
   *
*/

int guess_post_data_packge_type(char *data)
{
        char *p = NULL;

        if(data == NULL)
                return -1;

        p = strstr(data,"/r/n/r/n");//skip http protocol header
        if(p == NULL)
                return -2;

        p += strlen("/r/n/r/n");    //skip

        if(!strncmp(p,"<?xml",strlen("<?xml")))//xml enveloped
                return 1;
        else if(!strncmp(p,"-----",strlen("-----")))
                return 2;
        else if(!strncmp(p,"<soap",strlen("<soap")))
                return 3;
        else return 0;
}

int convert_post_data_to_utf8(char *data)
{
        char *p_post_data = NULL;
        char encode_type[16]={0};
        char data_converted[LENGTH_INFO*2] = {0};

        //skip http header
        p_post_data = strstr(data,"/r/n/r/n");
        if(p_post_data != NULL)
                        p_post_data += strlen("/r/n/r/n");
        else
                return 7;

        if(guess_post_data_packge_type(data) > 0)
        {//xml or mime

                switch(fuzzy_guess_encode_type(p_post_data,encode_type))
                {
                        case 0://gbk convert to utf-8
                        if(!convert_gbk_to_utf8(p_post_data,strlen(p_post_data),
                                        data_converted,LENGTH_INFO*2))
                                strncpy(p_post_data,data_converted,
                                MIN(strlen(data_converted),
                                (LENGTH_INFO-(int)p_post_data+(int)data)));
                                return 1;
                        case 1://utf-8
                        case 2:
                        case 3://dnt need convert anymore
                                return 2;
                        case 4://gb2312
                        case 5:
                                if(!convert_gb2312_to_utf8(p_post_data,strlen(p_post_data),
                                        data_converted,LENGTH_INFO*2))
                                strncpy(p_post_data,data_converted,
                                MIN(strlen(data_converted),
                                (LENGTH_INFO-(int)p_post_data+(int)data)));
                                return 4;
                        case 6://gb18030
                        case 7:
                                if(!convert_gb18030_to_utf8(p_post_data,strlen(p_post_data),
                                        data_converted,LENGTH_INFO*2))
                                strncpy(p_post_data,data_converted,
                                MIN(strlen(data_converted),
                                (LENGTH_INFO-(int)p_post_data+(int)data)));
                                return 6;
                        default:
                        return 7;
                }
        }
        else
        {
                if(!is_utf8(p_post_data)){
                        url_decode( data_converted,p_post_data,strlen(p_post_data));
//                      convert_url_to_utf8(data_converted, strlen(data_converted));
                        convert_gbk_to_utf8(data_converted, strlen(data_converted), p_post_data,(LENGTH_INFO-(int)p_post_data+(int)data));

//                      strncpy(p_post_data,data_converted,
//                              MIN(strlen(data_converted),(LENGTH_INFO-(int)p_post_data+(int)data)));
                }
                return 8;
        }

}

char *url_decode3(char *encd, char decd[])
{
    if( encd == NULL )
        return (char* )0;

    int i, j=0;
    char *cd = encd;
    char p[2];
    unsigned int num;

    for( i = 0; i < strlen( cd ); i++ )
    {
        memset( p, '/0', 2 );
        if( cd[i] != '%' )
        {
            decd[j++] = cd[i];
            continue;
        }
        p[0] = cd[++i];
        p[1] = cd[++i];

        sscanf( p, "%x", &num );
        sprintf(p, "%c", num );
        decd[j++] = p[0];
    }
    decd[j] = '/0';

    return decd;
}

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值