字符流编码检测

 

BOOL IsUTF8(char* buf)
{
    if(buf == NULL)
        return FALSE;

    int bufLen = strlen(buf);
    if(bufLen == 0)
        return TRUE;

    unsigned one_byte = 0X00; //binary 00000000
    unsigned special_byte = 0X02; //binary 00000010
    unsigned two_byte = 0X06; //binary 00000110
    unsigned three_byte = 0X0E; //binary 00001110
    unsigned four_byte = 0X1E; //binary 00011110
    unsigned five_byte = 0X3E; //binary 00111110
    unsigned six_byte = 0X7E; //binary 01111110

    unsigned char c1 = 0;
    unsigned char c2 = 0;
    unsigned char c3 = 0;
    unsigned char c4 = 0;
    unsigned char c5 = 0;
    unsigned char c6 = 0;

    int i = 0;
    char * tmp = buf;
    if(bufLen >= 3 && (BYTE)*tmp == 0xEF && (BYTE)*(tmp+1) == 0xBB && (BYTE)*(tmp+2) == 0xBF)//BOM
    {
        i = 3;
    }
    for(; i < bufLen; )
    {
        tmp = buf + i;
        c1 = (unsigned char)*tmp;
        if(c1>>7 == one_byte)//0XXXXXXX
        {
            i++;
            continue;
        }
        else if((c1>>5 == two_byte) && ((i + 1) < bufLen))//110XXXXX
        {
            c2 = (unsigned char)*(tmp+1);
            if(c2>>6 == special_byte)
            {
                i += 2;
                continue;
            }
            else
            {
                return FALSE;
            }
        }
        else if((c1>>4 == three_byte) && ((i + 2) < bufLen))//1110XXXX
        {
            c2 = (unsigned char)*(tmp+1);
            c3 = (unsigned char)*(tmp+2);
            if(c2>>6 == special_byte && c3>>6 == special_byte)
            {
                i += 3;
                continue;
            }
            else
            {
                return FALSE;
            }
        }
        else if((c1>>3 == four_byte) && ((i + 3) < bufLen))//11110XXX
        {
            c2 = (unsigned char)*(tmp+1);
            c3 = (unsigned char)*(tmp+2);
            c4 = (unsigned char)*(tmp+3);
            if(c2>>6 == special_byte && c3>>6 == special_byte && c4>>6 == special_byte)
            {
                i += 4;
                continue;
            }
            else
            {
                return FALSE;
            }
        }
        else if((c1>>2 == five_byte) && ((i + 4) < bufLen))//111110XX
        {
            c2 = (unsigned char)*(tmp+1);
            c3 = (unsigned char)*(tmp+2);
            c4 = (unsigned char)*(tmp+3);
            c5 = (unsigned char)*(tmp+4);
            if(c2>>6 == special_byte && c3>>6 == special_byte && c4>>6 == special_byte && c5>>6 == special_byte)
            {
                i += 5;
                continue;
            }
            else
            {
                return FALSE;
            }
        }
        else if((c1>>1 == five_byte) && ((i + 5) < bufLen))//1111110X
        {
            c2 = (unsigned char)*(tmp+1);
            c3 = (unsigned char)*(tmp+2);
            c4 = (unsigned char)*(tmp+3);
            c5 = (unsigned char)*(tmp+4);
            c6 = (unsigned char)*(tmp+5);
            if(c2>>6 == special_byte && c3>>6 == special_byte && c4>>6 == special_byte && c5>>6 == special_byte && c6>>6 == special_byte)
            {
                i += 6;
                continue;
            }
            else
            {
                return FALSE;
            }
        }
        else return FALSE;
    }
    return TRUE;
}

BOOL IsGB2312(char* buf)
{
    if(buf == NULL)
        return FALSE;

    int bufLen = strlen(buf);
    if(bufLen == 0)
        return TRUE;

    unsigned char c1 = 0;
    unsigned char c2 = 0;

    char * tmp = buf;
    for(int i = 0; i < bufLen; )
    {
        tmp = buf + i;
        c1 = (unsigned char)*tmp;
        if(c1>>7 == 0) //0XXXXXXX
        {
            i++;
            continue;
        }
        else if(c1 >= 0XA1 && c1 <= 0XF7 && (i + 1) < bufLen) //0xA1-0xF7
        {
            c2 = (unsigned char)*(tmp+1);
            if(c2 >= 0XA1 && c2 <= 0XFE) //0xA1-0xFE
            {
                i += 2;
                continue;
            }
            else
            {
                return FALSE;
            }
        }
        else return FALSE;
    }
    return TRUE;
}

BOOL IsGBK(char* buf)
{
    if(buf == NULL)
        return FALSE;

    int bufLen = strlen(buf);
    if(bufLen == 0)
        return TRUE;

    unsigned char c1 = 0;
    unsigned char c2 = 0;

    char * tmp = buf;
    for(int i = 0; i < bufLen; )
    {
        tmp = buf + i;
        c1 = (unsigned char)*tmp;
        if(c1>>7 == 0) //0XXXXXXX
        {
            i++;
            continue;
        }
        else if(c1 >= 0X81 && c1 <= 0XFE && (i + 1) < bufLen) //0x81-0xFE
        {
            c2 = (unsigned char)*(tmp+1);
            if(c2 >= 0X40 && c2 <= 0XFE && c2 != 0X7F) //0x40-0xFE (No 7F)
            {
                i += 2;
                continue;
            }
            else
            {
                return FALSE;
            }
        }
        else return FALSE;
    }
    return TRUE;
}

BOOL IsGB18030(char* buf)
{
    if(buf == NULL)
        return FALSE;

    int bufLen = strlen(buf);
    if(bufLen == 0)
        return TRUE;

    unsigned char c1 = 0;
    unsigned char c2 = 0;
    unsigned char c3 = 0;
    unsigned char c4 = 0;

    char * tmp = buf;
    for(int i = 0; i < bufLen; )
    {
        tmp = buf + i;
        c1 = (unsigned char)*tmp;
        if(c1>>7 == 0) //0XXXXXXX
        {
            i++;
            continue;
        }
        else if(c1 >= 0X81 && c1 <= 0XFE && (i + 1) < bufLen) //0x81-0xFE
        {
            c2 = (unsigned char)*(tmp+1);
            if(c2 >= 0X40 && c2 <= 0XFE && c2 != 0X7F) //0x40-0xFE (No 7F)
            {
                i += 2;
                continue;
            }
            else if(c2 >= 0X30 && c2 <= 0X39 && (i + 3) < bufLen) //0x30-0x39
            {
                c3 = (unsigned char)*(tmp+2);
                c4 = (unsigned char)*(tmp+3);
                if(c3 >= 0X81 && c3 <= 0XFE && c4 >= 0X30 && c4 <= 0X39)//0x81-0xFE,0x30-0x39
                {
                    i += 4;
                    continue;
                }
                else return FALSE;
            }
            else
            {
                return FALSE;
            }
        }
        else return FALSE;
    }
    return TRUE;
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值