在网上找了很多资料,都不是C语言的,结果一怒之下,就自己写了一个,程序如下:
/********************************************************
* @author Airead Fan <fgh1987168@gmail.com> *
* @date 2011 9月 25 10:31:03 CST *
********************************************************
* after studying C 69 days *
* after studying APUE 34 days *
********************************************************/
/*
* This program demonstrates method
* change utf-8 to unsigned long
*/
#include <stdio.h>
#include <stdlib.h>
unsigned long utf8tolong(unsigned char *str, int *offset)
{
unsigned long utflong; /* return value */
utflong = 0;
if(str[*offset] < 0x80){
/* 1 unit */
utflong |= str[*offset];
*offset += 1;
}else if(str[*offset] < 0xe0){
/* 2 unit */
utflong |= (str[*offset] & ~(0xe0));
utflong <<= 7;
utflong |= (str[*offset + 1] & ~(0xc0));
utflong >>=2;
*offset += 2;
}else if(str[*offset] < 0xf0){
/* 3 unit */
utflong |= (str[*offset] & ~(0xe0));
utflong <<= 8;
utflong |= ((str[*offset + 1] & ~(0xc0)) << 2);
utflong <<= 6;
utflong |= ((str[*offset + 2] & ~(0xc0)) << 2);
utflong >>= 2;
*offset += 3;
}
return utflong;
}
int main(int argc, char *argv[])
{
if(argc < 2){
fprintf(stderr, "usage: %s <utf str>\n", argv[0]);
exit(1);
}
int offset;
int num;
unsigned long utflong;
offset = 0;
num = 0;
while(argv[1][offset] != '\0'){
utflong = utf8tolong((unsigned char *)argv[1], &offset);
num++;
fprintf(stdout, "[%d]%lx\n", num, utflong);
}
return 0;
}