Linux下Gb2312与UTF-8的转换

转换函数:
static int u2s( char *t, int size, char *p_tmp )
{
    FILE *fp;
    char a, b;
    int i = 0;
    
    fp = fopen("/res/un_table11.c", "r");
    if(fp == null)
        return FALSE;
    do
    {
        a = fgetc(fp);
        b = fgetc(fp);
        i++;
    }while(a != t[0] || b != t[1]);
    fclose(fp);
    
    fp = fopen("/res/gb_table11.c", "r");
    if(fp == null)
        return FALSE;
    fseek(fp, 2 * (i - 1), SEEK_SET);
    p_tmp[0] = fgetc(fp);
    p_tmp[1] = fgetc(fp);
    fclose(fp);
    return TRUE;
}

static int utf8_2_gb2312(char *p)
{
    char *d;
    char *s;
    int len = 0;
    int i = 0;
    unsigned short a;
    unsigned char b;
    char *p_tmp = d;
    unsigned char t[ 2 ];
    
    d = malloc(100*sizeof(char));
    memset(d, 0, 100 * sizeof(char));
    s = malloc(100*sizeof(char));
    memset(s, 0, 100 * sizeof(char));
    sprintf(s, "%s", p);
    len = strlen(s);
    len += 1;

    do
    {
        if( 0xe0 == ( *s & 0xf0 ) &&
            0x80 == ( *( s + 1 ) & 0xc0 ) &&
            0x80 == ( *( s + 2 ) & 0xc0 ) )
        {
            //3个字节
            b = *s;
            b &= 0x0f;
            a = b;

            a <<= 6;
            b = *( s + 1 );
            b &= 0x3f;
            a |= b;

            a <<= 6;
            b = *( s + 2 );
            b &= 0x3f;
            a |= b;

            t[ 0 ] = ( a >> 8 ) & 0xff;
            t[ 1 ] = ( a & 0xff );
            
            if(!u2s( t, 2, p_tmp ))
                return FALSE;
                        
            p_tmp += 2;

            i += 3;
            s += 3;
        }
        else if ( 0xc0 == ( *s & 0xe0 ) &&
                0x80 == ( *( s + 1 ) & 0xc0 ) )
        {
            //2个字节
            b = *s;
            b &= 0x1f;
            a = b;

            a <<= 6;
            b = *( s + 1 );
            b &= 0x3f;
            a |= b;
            
            t[ 0 ] = ( a >> 8 ) & 0xff;
            t[ 1 ] = ( a & 0xff );
            
            if(!u2s( t, 2, p_tmp ))
                return FALSE;
           
            p_tmp += 2;

            i += 2;
            s += 2;
        }
        else
        {
            //1个字节
            *( p_tmp++ ) = *( s++ );
            
            i++;
        }
    } while ( i < len );
    *p_tmp = 0;
    
    memset(p, 0, sizeof(p));
    strcpy(p, d);
    //free(s);
    //free(d);

    return TRUE;
}

其中un_table11.c和gb_table11.c是想对应的2个码表。


http://blog.csdn.net/fjhyy/article/details/2466238

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值