// 把双字节汉字转化为十六进制字符串
int chinese_to_hex(char *word ,char *hex)
{
char temp_char[17];
memset( temp_char, 0, 17 );
sprintf( temp_char, "%X%X", word[ 0 ], word[ 1 ] );
sprintf( hex, "%c%c%c%c", temp_char[6], temp_char[7], temp_char[14], temp_char[15] );
return 0;
}
// 分割字符为字单元
int get_single_word( char *sz_text, char**word_list , long int &word_count )
{
char temp[3];
int dcount=0;
char hex[4];
memset( hex, 0, 4 );
memset(temp ,0 ,sizeof( temp ) );
long int i, nLen = strlen( sz_text );
word_count = 0;
for(i = 0; i < nLen; i++)
{
if((int)sz_text[i] >= 0 && (int)sz_text[i] <= 127) //半角[ 英文字母 ]
{
if( ( (int)sz_text[i] >= 48 && (int)sz_text[i] <= 57 ) ||
( (int)sz_text[i] >=65 && (int)sz_text[i] <= 90 ) ||
( (int)sz_text[i] >= 97 && (int)sz_text[i] <= 122 ) ||
(int)sz_text[i] ==46 || (int)sz_text[i] == 32 )
{
temp[0] = sz_text[i];
dcount = 0;
if((int)sz_text[i] ==46 || (int)sz_text[i] == 32)
{
dcount++;
}
}else
{
if( dcount == 0 )
{
temp[0] = ',';
dcount++;
}
}
}
else //全角字符[ 中文是2个字节 ]
{
temp[ 0 ] = sz_text[i]; temp[ 1 ] = sz_text[ i + 1 ];
chinese_to_hex( temp, hex );
if( ( strtol( hex, NULL, 16 ) >= 0XA1A0 && strtol( hex, NULL, 16 ) <=0XA3AF ) ||
( strtol( hex, NULL, 16 ) >= 0XA3BA && strtol( hex, NULL, 16 ) <=0XA3C0 ) ||
( strtol( hex, NULL, 16 ) >= 0XA3DB && strtol( hex, NULL, 16 ) <=0XA3E0 ) ||
( strtol( hex, NULL, 16 ) >= 0XA3FB && strtol( hex, NULL, 16 ) <=0XA3FF ) ||
( strtol( hex, NULL, 16 ) >= 0XA4A0 && strtol( hex, NULL, 16 ) <=0XA996 ) )
{
memset( temp, 0, 3);
if( dcount == 0 )
{
temp[ 0 ]=',';
dcount++;
}
}else
{
dcount=0;
}
i++;
}
sprintf( word_list[ word_count ], "%s", temp );
word_count++;
memset( temp , 0, sizeof( temp ) );
memset( hex, 0, 4 );
}
return 0;
}
// 文本杂志过滤
int filter_chars_text( char *in_put_text , char *out_put_text )
{
char *word_list[ BLOCKS_WORD_LEN ];
long int word_count = 0;
char sin_temp[ BLOCKS_WORD_LEN ];
int sin_count=0;
memset( out_put_text, 0, sizeof( out_put_text ) );
memset( sin_temp, 0, sizeof( sin_temp ) );
for(int i=0; i<BLOCKS_WORD_LEN; i++)
{
word_list[i] =new char[3];
memset( word_list[i], 0, 3 );
}
get_single_word( in_put_text, word_list, word_count );
for( int i = 0; i < BLOCKS_WORD_LEN; i++ )
{
if( i < word_count )
{
if( (int)(word_list[i][0]) >= 0 && (int)(word_list[i][0]) <= 127)
{
sin_temp[sin_count] = word_list[ i ][0];
sin_count++;
}else
{
if( strlen( sin_temp ) >0 && sin_temp!=NULL )
{
if(sin_count < 9)
{
strcat( out_put_text, sin_temp );
strcat( out_put_text, word_list[ i ] );
}else
{
strcat( out_put_text, word_list[ i ] );
}
}else
{
strcat( out_put_text, word_list[ i ] );
}
sin_count=0;
memset( sin_temp, 0, BLOCKS_WORD_LEN );
}
}
delete word_list[ i ];
}
return 0;
}
过滤网页中的全角半角字母的程序
最新推荐文章于 2024-10-29 11:09:48 发布