#include <iostream>
#include <fstream>
#include <string>
#include <assert.h>
using namespace std;
enum Encoding_type
{
UTF_8,
GBK
};
int read_bit(char number,int n)
{
cout<<number<<endl;
unsigned char _a = 0x1<<n;
// return (number | (1<<n))>>n;
return (number&_a)>>n;
}
unsigned int countGBK(const char * str)
{
assert(str != NULL);
unsigned int len = (unsigned int)strlen (str);
unsigned int counter = 0;
unsigned char head = 0x80;
unsigned char firstChar, secondChar;
for (unsigned int i = 0; i < len - 1; ++i)
{
firstChar = (unsigned char)str[i];
if (!(firstChar & head))continue;
secondChar = (unsigned char)str[i];
if (firstChar >= 161 && firstChar <= 247 && secondChar>=161 && secondChar <= 254)
{
counter+= 2;
++i;
}
}
return counter;
}
unsigned int countUTF8(const char * str)
{
assert(str != NULL);
unsigned int len = (unsigned int)strlen (str);
unsigned int counter = 0;
unsigned char head = 0x80;
unsigned char firstChar;
for (unsigned int i = 0; i < len; ++i)
{
firstChar = (unsigned char)str[i];
if (!(firstChar & head))continue;
unsigned char tmpHead = head;
unsigned int wordLen = 0 , tPos = 0;
while (firstChar & tmpHead)
{
++ wordLen;
tmpHead >>= 1;
}
if (wordLen <= 1)continue; //utf8最小长度为2
wordLen --;
if (wordLen + i >= len)break;
for (tPos = 1; tPos <= wordLen; ++tPos)
{
unsigned char secondChar = (unsigned char)str[i + tPos];
if (!(secondChar & head))break;
}
if (tPos > wordLen)
{
counter += wordLen + 1;
i += wordLen;
}
}
return counter;
}
bool beUtf8(const char *str)
{
unsigned int iGBK = countGBK(str);
unsigned int iUTF8= countUTF8(str);
if (iUTF8 > iGBK)return true;
return false;
}
int main(int argc,char **argv)
{
char * str = "dd绌胯秺锛氬皢鍐涳紝浣犺fd鎹曚簡鏈€鏂扮珷鑺?>> 鍑ょ懚 - 棣栧彂瑷€鎯呭皬璇村惂[www.xs8.cn]";
bool b = beUtf8(str);
return 0;
}
#include <fstream>
#include <string>
#include <assert.h>
using namespace std;
enum Encoding_type
{
UTF_8,
GBK
};
int read_bit(char number,int n)
{
cout<<number<<endl;
unsigned char _a = 0x1<<n;
// return (number | (1<<n))>>n;
return (number&_a)>>n;
}
unsigned int countGBK(const char * str)
{
assert(str != NULL);
unsigned int len = (unsigned int)strlen (str);
unsigned int counter = 0;
unsigned char head = 0x80;
unsigned char firstChar, secondChar;
for (unsigned int i = 0; i < len - 1; ++i)
{
firstChar = (unsigned char)str[i];
if (!(firstChar & head))continue;
secondChar = (unsigned char)str[i];
if (firstChar >= 161 && firstChar <= 247 && secondChar>=161 && secondChar <= 254)
{
counter+= 2;
++i;
}
}
return counter;
}
unsigned int countUTF8(const char * str)
{
assert(str != NULL);
unsigned int len = (unsigned int)strlen (str);
unsigned int counter = 0;
unsigned char head = 0x80;
unsigned char firstChar;
for (unsigned int i = 0; i < len; ++i)
{
firstChar = (unsigned char)str[i];
if (!(firstChar & head))continue;
unsigned char tmpHead = head;
unsigned int wordLen = 0 , tPos = 0;
while (firstChar & tmpHead)
{
++ wordLen;
tmpHead >>= 1;
}
if (wordLen <= 1)continue; //utf8最小长度为2
wordLen --;
if (wordLen + i >= len)break;
for (tPos = 1; tPos <= wordLen; ++tPos)
{
unsigned char secondChar = (unsigned char)str[i + tPos];
if (!(secondChar & head))break;
}
if (tPos > wordLen)
{
counter += wordLen + 1;
i += wordLen;
}
}
return counter;
}
bool beUtf8(const char *str)
{
unsigned int iGBK = countGBK(str);
unsigned int iUTF8= countUTF8(str);
if (iUTF8 > iGBK)return true;
return false;
}
int main(int argc,char **argv)
{
char * str = "dd绌胯秺锛氬皢鍐涳紝浣犺fd鎹曚簡鏈€鏂扮珷鑺?>> 鍑ょ懚 - 棣栧彂瑷€鎯呭皬璇村惂[www.xs8.cn]";
bool b = beUtf8(str);
return 0;
}