http://www.xemean.net/blog/user1/4/archives/2006/88.html
获取GB2312编码汉字的首字母
#i nclude <stdio.h>
#i nclude <stdlib.h>
#i nclude <string.h>
int isHZ = 0; // 汉字是否完整,当为1时说明汉字是完整的
unsigned short hz; // 当前要处理的汉字
// 我们这个函数中仅仅判断ACSII中可见字符
int isASCII(char ch)
{
if((ch >= 8 && ch <= 126))
return 1;
else
return 0;
}
int isGB2312(unsigned char * strIn)
{
unsigned char ch1;
unsigned char ch2;
if (strlen(strIn) >= 2)
{
ch1 = (unsigned char)strIn[0];
ch2 = (unsigned char)strIn[1];
if (ch1>=176 && ch1<=247 && ch2>=160 && ch2<=254)
{
//printf("strIn:%x/t%x/n", ch1, ch2);
return 1;
}
else
{
//printf("strIn:%x/t%x/n", ch1, ch2);
return 0;
}
}
else
return 0;
}
char getCharPY(unsigned char ch)
{
unsigned short tmp = 0;//用来保存临时变量,该变量的值与hz正好是以字节为单位倒序的
if(!isASCII(ch))
{
if(isHZ == 0)
{
isHZ++;
hz = ch;
return 0;
}
else
{
isHZ--;
tmp = hz;
tmp <<= 8;
hz = ch;
tmp += hz;
//获取汉字编码,即将tmp按字节倒序
hz <<= 8;
hz += tmp >> 8;
if(!isGB2312((unsigned char *)&hz))
return '?'; // 如果不是汉字(比如是一个符号)就返回'?'
}
}
if(tmp>=0xB0A1 && tmp<=0xB0C4)
return 'a';
else if(tmp>=0xB0C5 && tmp<=0xB2C0)
return 'b';
else if(tmp>=0xB2C1 && tmp<=0xB4ED)
return 'c';
else if(tmp>=0xB4EE && tmp<=0xB6E9)
return 'd';
else if(tmp>=0xB6EA && tmp<=0xB7A1)
return 'e';
else if(tmp>=0xB7A2 && tmp<=0xB8C0)
return 'f';
else if(tmp>=0xB8C1 && tmp<=0xB9FD)
return 'g';
else if(tmp>=0xB9FE && tmp<=0xBBF6)
return 'h';
else if(tmp>=0xBBF7 && tmp<=0xBFA5)
return 'j';
else if(tmp>=0xBFA6 && tmp<=0xC0AB)
return 'k';
else if(tmp>=0xC0AC && tmp<=0xC2E7)
return 'l';
else if(tmp>=0xC2E8 && tmp<=0xC4C2)
return 'm';
else if(tmp>=0xC4C3 && tmp<=0xC5B5)
return 'n';
else if(tmp>=0xC5B6 && tmp<=0xC5BD)
return 'o';
else if(tmp>=0xC5BE && tmp<=0xC6D9)
return 'p';
else if(tmp>=0xC6DA && tmp<=0xC8BA)
return 'q';
else if(tmp>=0xC8BB && tmp<=0xC8F5)
return 'r';
else if(tmp>=0xC8F6 && tmp<=0xCBF9)
return 's';
else if(tmp>=0xCBFA && tmp<=0xCDD9)
return 't';
else if(tmp>=0xCDDA && tmp<=0xCEF3)
return 'w';
else if(tmp>=0xCEF4 && tmp<=0xD188)
return 'x';
else if(tmp>=0xD1B9 && tmp<=0xD4D0)
return 'y';
else if(tmp>=0xD4D1 && tmp<=0xF351)
return 'z';
else
{
return ch;
}
}
char * getStrPY(unsigned char *strIn, char *strOut)
{
unsigned i = 0, j = 0;
char c = 0;
unsigned char * pWork = strIn;
for(i = 0; i < strlen(strIn); i++)
{
pWork = strIn + i;
// 我们将可见的ASCII字符直接输出
if(!isASCII(*pWork))
{
c = getCharPY(*pWork);
}
else
{
c = *pWork;
}
if(isASCII(c))
{
strOut[j] = c;
j++;
}
}
return strOut;
}
void main(void)
{
char strTest[] = "IBM Sametime是定位于企业市场的即时通讯协作平台。据介绍,IBM Lotus Sametime 7.5/n/t增加了诸如音频/视频集成技术、多用户IP语音呼叫功能等,并可实现基于位置的感知。";
char strOut[512] = {0};//注意这个数组的大小
printf("in:/t%s/n", strTest);
getStrPY(strTest, strOut);
printf("out:/t%s/n", strOut);
}