判断字符串 编码是否为 utf-8还是gbk

#include <iostream>
#include <fstream>
#include <string>
#include <assert.h>
using namespace std;


enum Encoding_type
{
UTF_8,
GBK
};


int read_bit(char number,int n)
{
cout<<number<<endl;
    unsigned char _a = 0x1<<n;


// return (number | (1<<n))>>n;
    return (number&_a)>>n;
}


unsigned int countGBK(const char * str)  
{  
    assert(str != NULL);  
    unsigned int len = (unsigned int)strlen (str);  
    unsigned int counter = 0;  
    unsigned char head = 0x80;  
    unsigned char firstChar, secondChar;  
  
    for (unsigned int i = 0; i < len - 1; ++i)  
    {  
        firstChar = (unsigned char)str[i];  
        if (!(firstChar & head))continue;  
        secondChar = (unsigned char)str[i];  
        if (firstChar >= 161 && firstChar <= 247 && secondChar>=161 && secondChar <= 254)  
        {  
            counter+= 2;  
            ++i;  
        }  
    }  
    return counter;  
}  
  
unsigned int countUTF8(const char * str)  
{  
    assert(str != NULL);  
    unsigned int len = (unsigned int)strlen (str);  
    unsigned int counter = 0;  
    unsigned char head = 0x80;  
    unsigned char firstChar;  
    for (unsigned int i = 0; i < len; ++i)  
    {  
        firstChar = (unsigned char)str[i];  
        if (!(firstChar & head))continue;  
        unsigned char tmpHead = head;  
        unsigned int wordLen = 0 , tPos = 0;  
        while (firstChar & tmpHead)  
        {  
            ++ wordLen;  
            tmpHead >>= 1;  
        }  
        if (wordLen <= 1)continue; //utf8最小长度为2  
        wordLen --;  
        if (wordLen + i >= len)break;  
        for (tPos = 1; tPos <= wordLen; ++tPos)  
        {  
            unsigned char secondChar = (unsigned char)str[i + tPos];  
            if (!(secondChar & head))break;  
        }  
        if (tPos > wordLen)  
        {  
            counter += wordLen + 1;  
            i += wordLen;  
        }  
    }  
    return counter;  
}  
  
bool beUtf8(const char *str)  
{  
   unsigned int iGBK = countGBK(str);  
    unsigned int iUTF8= countUTF8(str);  
    if (iUTF8 > iGBK)return true;  
    return false;  

int main(int argc,char **argv)
{
char * str = "dd绌胯秺锛氬皢鍐涳紝浣犺fd鎹曚簡鏈€鏂扮珷鑺?>> 鍑ょ懚 - 棣栧彂瑷€鎯呭皬璇村惂[www.xs8.cn]";




bool b = beUtf8(str);


  return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值