#include "main.h" #include<errno.h> /* %40 @ < < > > & & ' ' " " nbsp; ' ' */ //common simple chinese character encode char *encode_type[] = { "gbk", "utf-8", "utf8", "unicode", "gb2312", "gb-2312", "gb18030", "gb-18030" }; //------------------------------------------------------------------------------------ //ÍõÁÁ 2008-1-22Ìí¼Ó #define GB2312_LOW_LEFT 0xB0 //gb2312µÍλÆðʼ #define GB2312_LOW_RIGHT 0xF7 //gb2312µÍλ½áÊø #define GB2312_HIGH_LEFT 0xA0 //gb2312¸ßλÆðʼ #define GB2312_HIGH_RIGHT 0xFE //gb2312¸ßλ½áÊø #define GBK_LOW_LEFT 0x81 //gbkµÍλÆðʼ #define GBK_LOW_RIGHT 0xFE //gbkµÍλ½áÊø #define GBK_HIGH_LEFT 0x40 //gbk¸ßλÆðʼ #define GBK_HIGH_RIGHT 0xFE //gbk¸ßλ½áÊø #define BIG5_LOW_LEFT 0x81 //big5µÍλÆðʼ #define BIG5_LOW_RIGHT 0xFE //big5µÍλ½áÊø #define BIG5_HIGH1_LEFT 0x40 //big5¸ßλ1Æðʼ #define BIG5_HIGH1_RIGHT 0x7E //big5¸ßλ1½áÊø #define BIG5_HIGH2_LEFT 0xA1 //big5¸ßλ2Æðʼ #define BIG5_HIGH2_RIGHT 0xFE //big5¸ßλ2½áÊø #define MAX_LENGTH 2 int IsGB2312(char *src,int len) { //------------------------------- //º¯ÊýÃû: IsGB2312 //×÷ÓÃ: УÑé×Ö·ûÊDz»ÊÇgb2312µÄ±àÂë //ÊäÈë²ÎÊý: srcÐèҪת»¯µÄ×Ö·û£¬len×Ö·û³¤¶È //·µ»ØÖµ: 0 È·ÈÏ 1 ·ñ¶¨ ³É¹¦·µ»Ø 0 ʧ°Ü·µ»Ø 1 //------------------------------- if(len <= MAX_LENGTH) return 1; int i; for(i = 0; i < len; ++i) { if((unsigned char)*src >= GB2312_LOW_LEFT && (unsigned char)*src <= GB2312_LOW_RIGHT && (unsigned char)*(src + 1) >= GB2312_HIGH_LEFT && (unsigned char)*(src + 1) <= GB2312_HIGH_RIGHT) return 0; ++src; } return 1; } int IsGBK(char *src,int len) { //------------------------------- //º¯ÊýÃû: IsGBK //×÷ÓÃ: УÑé×Ö·ûÊDz»ÊÇgbkµÄ±àÂë //ÊäÈë²ÎÊý: srcÐèҪת»¯µÄ×Ö·û£¬len×Ö·û³¤¶È //·µ»ØÖµ: 0 È·ÈÏ 1 ·ñ¶¨ ³É¹¦·µ»Ø 0 ʧ°Ü·µ»Ø 1 //------------------------------- if(len <= MAX_LENGTH) return 1; int i; for(i = 0; i < len; ++i) { if((unsigned char)*src >= GBK_LOW_LEFT && (unsigned char)*src <= GBK_LOW_RIGHT && (unsigned char)*(src + 1) >= GBK_HIGH_LEFT && (unsigned char)*(src + 1) <= GBK_HIGH_RIGHT) return 0; ++src; } return 1; } int IsBig5(char *src,int len) { //УÑé×Ö·ûÊDz»ÊÇbig5µÄ±àÂë //Big5 0x81-0xFE£¨129-255£© 0x40-0x7E£¨64-126£© //0xA1£0xFE£¨161-254£© if(len <= MAX_LENGTH) return 1; int i; for(i = 0;i < len; ++i) { if((unsigned char)*src >= 0x81 && (unsigned char)*src <= 0xFE) { //Åжϵڶþλ unsigned char str2 = *(src + 1); if((str2 >= 0xA1 && str2 <= 0xFE) || ( 0x40 >= 161 && str2 <= 0x7E)) return 0; } } return 1; } //------------------------------------------------------------------------------------ //find string needle from haystack ,case ignored .haystack and needle must be ASCII string //return the pointer point to needle found in haystack,NULL if not found char *strcasestr(const char *haystack,const char *needle) { const char *p_haystack = NULL; const char *p_needle = NULL; const char *p = NULL; if(haystack == NULL || needle == NULL) return NULL; for(p_haystack=haystack;*p_haystack != 0;p_haystack++) { for(p_needle=needle,p=p_haystack;*p_needle!=0&&*p!=0;p_needle++,p++) { if(*p_needle == *p ||(*p_needle>64&&*p_needle<91&&*p_needle==(*p-32)) ||(*p_needle>96&&*p_needle<123&&*p_needle==(*p+32))) continue; else break; } if(*p_needle == 0)//reach end of needle return (char *)p_haystack; } return NULL; } int url_decode2(char *dst, const char *src, int src_len) { char c1,c2; int i = 0; int j = 0; u_char *p_dst = (u_char *)dst; const u_char *p_src = (u_char *)src; if ( ( dst == NULL ) || ( src == NULL ) ) return -1; for ( i = 0, j = 0; i < src_len; ) { if ( *(p_src+i) == '=' ) { c1 = tolower( *(p_src+i+1) ); c2 = tolower( *(p_src+i+2) ); if ( !isxdigit(c1) || !isxdigit(c2) ) { /*If not urlencoded data then copy it*/ *(p_dst+j) = *(p_src+i); *(p_dst+j+1) = *(p_src+i+1); *(p_dst+j+2) = *(p_src+i+2); j += 3; i += 3; } else { if ( c1 <= '9' ) c1 = c1 - '0'; else c1 = c1 - 'a' + 10; if ( c2 <= '9' ) c2 = c2 - '0'; else c2 = c2 - 'a' + 10; *(p_dst+j) = 16 * c1 + c2; j++; i += 3; } } else if ( *(p_src+i) == '+' ) { *(p_dst+j) = ' '; j++; i++; } else { *(p_dst+j) = *(p_src+i); j++; i++; } } return j; } int code_convert(char *from_charset, char *to_charset, char *src, size_t src_len, char *des, size_t des_len ){ iconv_t cd = 0; char **pin = &src; char **pout = &des; cd = iconv_open(to_charset, from_charset); /*if(cd == (iconv_t)1000 ); { puts("the iconv open result is -1/n"); return -1; }*/ // printf("you come here ");//for test if(iconv(cd, pin, &src_len, pout, &des_len) == -1) return -1; iconv_close(cd); return 0; } void get_element_by_flag(char *buf, char *to, char *start_flag, char *end_flag) { char *pstart = NULL, *pend = NULL; if(!buf || !to || !start_flag || !end_flag) return; pstart = strstr(buf, start_flag); if(pstart){ pstart = pstart + strlen(start_flag); if(pstart){ pend = strstr(pstart, end_flag); if(pend){ strncpy(to, pstart, pend - pstart); } } } } void get_element_by_flag_end(char *buf, char *to, char *start_flag, char *pend) { char *pstart = NULL; if(!buf || !to || !start_flag || !pend) return; pstart = strstr(buf, start_flag); if(pstart){ pstart = pstart+strlen(start_flag); if(pstart){ if(pend>pstart){ strncpy(to, pstart, pend-pstart); } } } } void set_string_end_by_char(char *start, char *end, char flag) { int i = 0; if(!start || !end) return; for(i=0; start+i<end; i++){ if(flag == *(start+i) ){ *(start+i) = '/0'; return; } } } void delete_html_tag(char *buf, char *to, char *html_flag, char *left_angle_bracket, char *right_angle_bracket) { /* %3C%2F="</" %3E=">"*/ /* if buf is like "123<p>456</p>789"*/ char *html_tag = NULL,*pend = NULL, *pstart = NULL; if(!buf) return; html_tag = strstr(buf, html_flag); if(html_tag) { /* intercept "123" */ pend = strstr(buf, left_angle_bracket); if(pend) { memmove(to, buf, (pend-buf)); to += (pend - buf); buf += (pend - buf); } /* intercept "456" */ pend = strstr(buf+strlen(left_angle_bracket), left_angle_bracket); while(pend) { pstart = strstr(buf, right_angle_bracket); if(pstart) { pend = strstr(pstart, left_angle_bracket); if(pend-pstart>strlen(left_angle_bracket)) { memmove(to, pstart+strlen(right_angle_bracket), (pend-pstart-strlen(right_angle_bracket))); to=to+(pend-pstart-strlen(right_angle_bracket)); } } buf = pend+strlen(left_angle_bracket); pend = strstr(buf, left_angle_bracket); } /* intercept "789" */ pstart = strstr(buf, right_angle_bracket); if(pstart) { pstart = pstart+strlen(right_angle_bracket); memmove(to, pstart, strlen(pstart)); to = to + strlen(pstart); } *to = '/0'; } else { while(*buf != '/0') *to++ = *buf++; *to = '/0'; } } void reset_http_esc(char *psrc, int length, char *pneedle, char c_reset) { char *pthis=NULL; char *pclear=psrc+length-1; int success_flag =0; int i,j = 0; if(!psrc || !pneedle || length<3) return; i = strlen(pneedle); while(length>3){ pthis = strstr(psrc, pneedle); if(pthis){ *pthis = c_reset; memmove(pthis+1, pthis+i, length-(pthis-psrc)-i); length -= (i-1); success_flag++; }else{ break; } } if(success_flag) { for(j=0; j<success_flag; j++) { while((--i)) { *pclear-- ='/0'; } i = strlen(pneedle); } } return; } static void decodeQuantum(unsigned char *dest, char *src) { unsigned int x = 0; int i; for(i = 0; i < 4; i++) { if(src[i] >= 'A' && src[i] <= 'Z') x = (x << 6) + (unsigned int)(src[i] - 'A' + 0); else if(src[i] >= 'a' && src[i] <= 'z') x = (x << 6) + (unsigned int)(src[i] - 'a' + 26); else if(src[i] >= '0' && src[i] <= '9') x = (x << 6) + (unsigned int)(src[i] - '0' + 52); else if(src[i] == '+') x = (x << 6) + 62; else if(src[i] == '/') x = (x << 6) + 63; else if(src[i] == '=') x = (x << 6); } dest[2] = (unsigned char)(x & 255); x >>= 8; dest[1] = (unsigned char)(x & 255); x >>= 8; dest[0] = (unsigned char)(x & 255); x >>= 8; } static void base64Decode(unsigned char *dest, char *src, int *rawLength) { int length = 0; int equalsTerm = 0; int i; int numQuantums; unsigned char lastQuantum[3]; while((src[length] != '=') && src[length]) length++; while(src[length+equalsTerm] == '=') equalsTerm++; numQuantums = (length + equalsTerm) / 4; if(rawLength) *rawLength = (numQuantums * 3) - equalsTerm; for(i = 0; i < numQuantums - 1; i++) { decodeQuantum(dest, src); dest += 3; src += 4; } decodeQuantum(lastQuantum, src); for(i = 0; i < 3 - equalsTerm; i++) dest[i] = lastQuantum[i]; } static const char table64[]= "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; int find64(char c) { int i=0; for(i=0;i<sizeof(table64);i++) if(c==table64[i])return 1; return 0; } /* size_t base64_encode(const char *inp, size_t insize, char **outptr) { unsigned char ibuf[3]; unsigned char obuf[4]; int i; int inputparts; char *output; char *base64data; char *indata = (char *)inp; *outptr = NULL; if(0 == insize) insize = strlen(indata); base64data = output = (char*)malloc(insize*4/3+4); if(NULL == output) return 0; while(insize > 0) { for (i = inputparts = 0; i < 3; i++) { if(insize > 0) { inputparts++; ibuf[i] = *indata; indata++; insize--; } else ibuf[i] = 0; } obuf [0] = (ibuf [0] & 0xFC) >> 2; obuf [1] = ((ibuf [0] & 0x03) << 4) | ((ibuf [1] & 0xF0) >> 4); obuf [2] = ((ibuf [1] & 0x0F) << 2) | ((ibuf [2] & 0xC0) >> 6); obuf [3] = ibuf [2] & 0x3F; switch(inputparts) { case 1: snprintf(output, 5, "%c%c==", table64[obuf[0]], table64[obuf[1]]); break; case 2: snprintf(output, 5, "%c%c%c=", table64[obuf[0]], table64[obuf[1]], table64[obuf[2]]); break; default: snprintf(output, 5, "%c%c%c%c", table64[obuf[0]], table64[obuf[1]], table64[obuf[2]], table64[obuf[3]] ); break; } output += 4; } *output=0; *outptr = base64data; return strlen(base64data); } */ int base64_decode(const char *str,void *data) { int ret; base64Decode((unsigned char *)data,(char *)str,&ret); return ret; } /*_____________________________________________Lastest base64__________________________________________________________*/ //--------------------------------------------------------------------------- // 4bit binary to char 0-F char Hex2Chr( unsigned char n ) { n &= 0xF; if ( n < 10 ) return ( char )( n + '0' ); else return ( char )( n - 10 + 'A' ); } //--------------------------------------------------------------------------- // char 0-F to 4bit binary unsigned char Chr2Hex( char c ) { if ( c >= 'a' && c <= 'z' ) // it's toupper c = c - 'a' + 'A'; if ( c >= '0' && c <= '9' ) return ( int )( c - '0' ); else if ( c >= 'A' && c <= 'F' ) return ( int )( c - 'A' + 10 ); else return -1; } //--------------------------------------------------------------------------- // Base64 code table // 0-63 : A-Z(25) a-z(51), 0-9(61), +(62), /(63) char Base2Chr( unsigned char n ) { n &= 0x3F; if ( n < 26 ) return ( char )( n + 'A' ); else if ( n < 52 ) return ( char )( n - 26 + 'a' ); else if ( n < 62 ) return ( char )( n - 52 + '0' ); else if ( n == 62 ) return '+'; else return '/'; } //--------------------------------------------------------------------------- unsigned char Chr2Base( char c ) { if ( c >= 'A' && c <= 'Z' ) return ( unsigned char )( c - 'A' ); else if ( c >= 'a' && c <= 'z' ) return ( unsigned char )( c - 'a' + 26 ); else if ( c >= '0' && c <= '9' ) return ( unsigned char )( c - '0' + 52 ); else if ( c == '+' ) return 62; else if ( c == '/' ) return 63; else return 64; // ÎÞЧ×Ö·û } //--------------------------------------------------------------------------- // aLen Ϊ aSrc µÄ´óС£¬ aDest ËùÖ¸µÄ»º³åÇø±ØÐëÖÁÉÙΪ aLen µÄ 3 ±¶£¡£¡£¡ // ·µ»Ø aDest µÄ³¤¶È int QPEncode( char * const aDest, const unsigned char * aSrc, int aLen ) { char * p = aDest; int i = 0; while ( i++ < aLen ) { *p++ = '='; *p++ = Hex2Chr( *aSrc >> 4 ); *p++ = Hex2Chr( *aSrc++ ); } *p = 0; // aDest is an ASCIIZ string return ( p - aDest ); // exclude the end of zero } //--------------------------------------------------------------------------- // aDest ËùÖ¸µÄ»º³åÇø±ØÐëÖÁÉÙΪ aSrc ³¤¶ÈµÄ 1/3 £¡£¡£¡ // ·µ»Ø aDest µÄ³¤¶È int QPDecode( unsigned char * const aDest, const char * aSrc ) { unsigned char * p = aDest; int n = strlen( aSrc ); unsigned char ch, cl; while ( *aSrc ) // aSrc is an ASCIIZ string { if ( ( *aSrc == '=' ) && ( n - 2 > 0 ) ) { ch = Chr2Hex( aSrc[1] ); cl = Chr2Hex( aSrc[2] ); if ( ( ch == ( unsigned char )-1 ) || ( cl == ( unsigned char )-1 ) ) *p++ = *aSrc++; else { *p++ = ( ch << 4 ) | cl; aSrc += 3; } } else *p++ = *aSrc++; } return ( p - aDest ); } //--------------------------------------------------------------------------- // aLen Ϊ aSrc µÄ³¤¶È£¬ aDest ËùÖ¸µÄ»º³åÇø±ØÐëÖÁÉÙΪ aLen µÄ 1.33 ±¶£¡£¡£¡ // ·µ»Ø aDest µÄ³¤¶Èa int Base64Encode( char * const aDest, const unsigned char * aSrc, int aLen ) { char * p = aDest; int i; unsigned char t=0; for ( i = 0; i < aLen; i++ ) { switch ( i % 3 ) { case 0 : *p++ = Base2Chr( *aSrc >> 2 ); t = ( *aSrc++ << 4 ) & 0x3F; break; case 1 : *p++ = Base2Chr( t | ( *aSrc >> 4 ) ); t = ( *aSrc++ << 2 ) & 0x3F; break; case 2 : *p++ = Base2Chr( t | ( *aSrc >> 6 ) ); *p++ = Base2Chr( *aSrc++ ); break; } } if ( aLen % 3 != 0 ) { *p++ = Base2Chr( t ); if ( aLen % 3 == 1 ) *p++ = '='; *p++ = '='; } *p = 0; // aDest is an ASCIIZ string return ( p - aDest ); // exclude the end of zero } int Base64Decode( unsigned char * const aDest, const char * aSrc ) { unsigned char * p = aDest; int i; int n = strlen( aSrc ); unsigned char c; unsigned char t=0; for ( i = 0; i < n; i++ ) { if ( *aSrc == '=' ) break; do { if ( *aSrc ) c = Chr2Base( *aSrc++ ); else c = 65; // ×Ö·û´®½áÊø } while ( c == 64 ); // Ìø¹ýÎÞЧ×Ö·û£¬Èç»Ø³µµÈ if ( c == 65 ) break; switch ( i % 4 ) { case 0 : t = c << 2; break; case 1 : *p++ = ( unsigned char )( t | ( c >> 4 ) ); t = ( unsigned char )( c << 4 ); break; case 2 : *p++ = ( unsigned char )( t | ( c >> 2 ) ); t = ( unsigned char )( c << 6 ); break; case 3 : *p++ = ( unsigned char )( t | c ); break; } } return ( p - aDest ); } //--------------------------------------------------------------------------- /*_____________________________________________________________________________________________________________________*/ void qp(unsigned char sour,unsigned char first,unsigned char second) { if(sour>127){ first=sour>>4; second=sour&15; if(first>9)first+=55; else first+=48; if(second>9)second+=55; else second+=48; } printf("%c%c%c/n",'=',first,second); } void uqp(unsigned char sour,unsigned char first,unsigned char second) { if(first>=65)first-=55; else first-=48; if(second>=65)second-=55; else second-=48; sour=0; sour=first<<4; sour|=second; } int EncodeQuoted(const unsigned char* pSrc, char* pDst, int nSrcLen, int nMaxLineLen) { int nDstLen=0; int nLineLen=0; int i=0; for (i = 0; i < nSrcLen; i++, pSrc++){ if ((*pSrc >= '!') && (*pSrc <= '~') && (*pSrc != '=')){ *pDst++ = (char)*pSrc; nDstLen++; nLineLen++; }else{ sprintf(pDst, "=%02X", *pSrc); pDst += 3; nDstLen += 3; nLineLen += 3; } if (nLineLen >= nMaxLineLen - 3){ sprintf(pDst, "=/r/n"); pDst += 3; nDstLen += 3; nLineLen = 0; } } *pDst = '/0'; return nDstLen; } int DecodeQuoted(const char* pSrc, unsigned char *pDst, int nSrcLen) { int nDstLen=0; int i=0; while (i < nSrcLen){ if (strncmp(pSrc, "=/r/n", 3) == 0){ pSrc += 3; i += 3; }else{ if (*pSrc == '='){ sscanf(pSrc, "=%02X", (unsigned int *)pDst); pDst = (unsigned char *)pDst; pDst++; pSrc += 3; i += 3; }else{ *pDst++ = (unsigned char)*pSrc++; i++; } nDstLen++; } } *pDst = '/0'; return nDstLen; } int is_utf8(const char *data) { char *start; if ( data == NULL ) return -1; start = strchr( data, '%' ); if ( start == NULL ) return -1; if ( ( *(start+1) == 'E' ) && ( *(start+3) == '%' ) && ( *(start+6) == '%' ) ) return 1; else return 0; return -1; } /*urldecode*/ int url_decode( char *dst, const char *src, int src_len ) { char c1,c2; int i = 0; int j = 0; char *p_dst = dst; // char *ph_dst = dst; const char *p_src = src; if ( ( dst == NULL ) || ( src == NULL ) || (src_len == 0)) return -1; for ( i = 0, j = 0; i < src_len; ) { if ( *(p_src+i) == '%' ) { c1 = tolower( *(p_src+i+1) ); c2 = tolower( *(p_src+i+2) ); if ( !isxdigit(c1) || !isxdigit(c2) ) { /*If not urlencoded data then copy it*/ *(p_dst+j) = *(p_src+i); *(p_dst+j+1) = *(p_src+i+1); *(p_dst+j+2) = *(p_src+i+2); j += 3; i += 3; } else { if ( c1 <= '9' ) c1 = c1 - '0'; else c1 = c1 - 'a' + 10; if ( c2 <= '9' ) c2 = c2 - '0'; else c2 = c2 - 'a' + 10; *(p_dst+j) = 16 * c1 + c2; j++; i += 3; } } else if ( *(p_src+i) == '+' ) { *(p_dst+j) = ' '; j++; i++; } else { *(p_dst+j) = *(p_src+i); j++; i++; } } *(p_dst+j) = '/0'; return j; } int convert_gbk_to_utf8(char *inbuf, int inlen, char *outbuf, int outlen) { iconv_t cd; char **pin = &inbuf; char **pout = &outbuf; size_t inbufsize = inlen; size_t outbufsize = outlen; size_t *insize = &inbufsize; size_t *outsize = &outbufsize; cd = iconv_open("UTF-8", "GBK"); if(cd == (iconv_t)(-1)) return -1; memset(outbuf, 0, outlen); if(iconv(cd, pin, insize, pout, outsize) == -1) { iconv_close(cd); return -1; } iconv_close(cd); return 0; } int convert_big5_to_utf8(char *inbuf,int inlen,char *outbuf,int outlen) { iconv_t cd; char **pin = &inbuf; char **pout = &outbuf; size_t inbufsize = inlen; size_t outbufsize = outlen; size_t *insize = &inbufsize; size_t *outsize = &outbufsize; cd = iconv_open("UTF-8","big5"); if(cd == (iconv_t)(-1)) return -1; memset(outbuf,0,outlen); if(iconv(cd,pin,insize,pout,outsize) == -1) { perror("iconv big5->utf8:"); iconv_close(cd); return -1; } iconv_close(cd); return 0; } int convert_gb2312_to_utf8(char *inbuf,int inlen,char *outbuf,int outlen) { iconv_t cd; char **pin = &inbuf; char **pout = &outbuf; size_t inbufsize = inlen; size_t outbufsize = outlen; size_t *insize = &inbufsize; size_t *outsize = &outbufsize; cd = iconv_open("UTF-8","GB2312"); if(cd == (iconv_t)(-1)) return -1; memset(outbuf,0,outlen); if(iconv(cd,pin,insize,pout,outsize) == -1) { perror("iconv gb2312->utf8:"); iconv_close(cd); return -1; } iconv_close(cd); return 0; } int convert_gb18030_to_utf8(char *inbuf,int inlen,char *outbuf,int outlen) { iconv_t cd; char **pin = &inbuf; char **pout = &outbuf; size_t inbufsize = strlen(inbuf); size_t outbufsize = strlen(outbuf); size_t *insize = &inbufsize; size_t *outsize = &outbufsize; cd = iconv_open("UTF-8","GB18030"); if(cd == (iconv_t)(-1)) return -1; memset(outbuf,0,outlen); if(iconv(cd,pin,insize,pout,outsize) == -1) { perror("iconv gb18030->utf8:"); iconv_close(cd); return -1; } iconv_close(cd); return 0; } void convert_url_to_utf8(char *data, int len) { if(NULL==data) return ; int flag; char inbuf[len+1]; char midbuf[len+1]; strncpy(inbuf, data, len); memset(data, 0, len+1); flag = is_utf8(inbuf); if (flag == 1){ url_decode(data, inbuf, len); }else if (flag == 0){ url_decode(midbuf, inbuf, len); convert_gbk_to_utf8(midbuf, strlen(midbuf), data, len); //convert_gbk_to_utf8(midbuf, len, data, len); } else strncpy(data, inbuf, len); return; } /* *function *return >=0:return index of array encode_type, * encode type will be stored in variable "type" if type is not NULL *return -1:dont know encode type *return <-1:error number */ int fuzzy_guess_encode_type(char *data,char *type) { char *code_key[] = {"charset","encode"};//keyword of encode type char *p_key = NULL; char tmp[FUZZY_OFFSET] = {0}; int i = 0; int j = 0; int data_len = 0; if(data==NULL ) return -1; data_len = strlen(data); for(i=0;i<(sizeof(code_key)/sizeof(void *));i++) { while((p_key=strcasestr(data,code_key[i])) != NULL) { //right side of keyword strncpy(tmp,p_key+strlen(code_key[i]), (FUZZY_OFFSET-1)<(data_len-(int)p_key-strlen(code_key[i])+(int)data)?(FUZZY_OFFSET-1):(data_len-(int)p_key-strlen(code_key[i])+(int)data)); tmp[FUZZY_OFFSET] = 0; //find possible encode type for(j=0;j<(sizeof(encode_type)/(sizeof(void *)));j++) { / if(strlen(tmp) >= strlen(encode_type[j]) &&strcasestr(tmp,encode_type[j])) { if(type != NULL) strcpy(type,encode_type[j]); return j; } } //left side of keyword strncpy(tmp,p_key, (FUZZY_OFFSET-1)<(p_key-data)? (FUZZY_OFFSET-1):(p_key-data)); tmp[FUZZY_OFFSET] = 0; for(j=0;j<sizeof(encode_type);j++) { if(strlen(tmp) >= strlen(encode_type[j]) && strcasestr(tmp,encode_type[j])) { if(type != NULL) strcpy(type,encode_type[j]); return j; } } } } return -1;//encodeing type not found } /* *return values: 0 type unknown 1 xml 2 mime 3 soap <0 errors * * */ int guess_post_data_packge_type(char *data) { char *p = NULL; if(data == NULL) return -1; p = strstr(data,"/r/n/r/n");//skip http protocol header if(p == NULL) return -2; p += strlen("/r/n/r/n"); //skip if(!strncmp(p,"<?xml",strlen("<?xml")))//xml enveloped return 1; else if(!strncmp(p,"-----",strlen("-----"))) return 2; else if(!strncmp(p,"<soap",strlen("<soap"))) return 3; else return 0; } int convert_post_data_to_utf8(char *data) { char *p_post_data = NULL; char encode_type[16]={0}; char data_converted[LENGTH_INFO*2] = {0}; //skip http header p_post_data = strstr(data,"/r/n/r/n"); if(p_post_data != NULL) p_post_data += strlen("/r/n/r/n"); else return 7; if(guess_post_data_packge_type(data) > 0) {//xml or mime switch(fuzzy_guess_encode_type(p_post_data,encode_type)) { case 0://gbk convert to utf-8 if(!convert_gbk_to_utf8(p_post_data,strlen(p_post_data), data_converted,LENGTH_INFO*2)) strncpy(p_post_data,data_converted, MIN(strlen(data_converted), (LENGTH_INFO-(int)p_post_data+(int)data))); return 1; case 1://utf-8 case 2: case 3://dnt need convert anymore return 2; case 4://gb2312 case 5: if(!convert_gb2312_to_utf8(p_post_data,strlen(p_post_data), data_converted,LENGTH_INFO*2)) strncpy(p_post_data,data_converted, MIN(strlen(data_converted), (LENGTH_INFO-(int)p_post_data+(int)data))); return 4; case 6://gb18030 case 7: if(!convert_gb18030_to_utf8(p_post_data,strlen(p_post_data), data_converted,LENGTH_INFO*2)) strncpy(p_post_data,data_converted, MIN(strlen(data_converted), (LENGTH_INFO-(int)p_post_data+(int)data))); return 6; default: return 7; } } else { if(!is_utf8(p_post_data)){ url_decode( data_converted,p_post_data,strlen(p_post_data)); // convert_url_to_utf8(data_converted, strlen(data_converted)); convert_gbk_to_utf8(data_converted, strlen(data_converted), p_post_data,(LENGTH_INFO-(int)p_post_data+(int)data)); // strncpy(p_post_data,data_converted, // MIN(strlen(data_converted),(LENGTH_INFO-(int)p_post_data+(int)data))); } return 8; } } char *url_decode3(char *encd, char decd[]) { if( encd == NULL ) return (char* )0; int i, j=0; char *cd = encd; char p[2]; unsigned int num; for( i = 0; i < strlen( cd ); i++ ) { memset( p, '/0', 2 ); if( cd[i] != '%' ) { decd[j++] = cd[i]; continue; } p[0] = cd[++i]; p[1] = cd[++i]; sscanf( p, "%x", &num ); sprintf(p, "%c", num ); decd[j++] = p[0]; } decd[j] = '/0'; return decd; }