Decode unicode

在网上找了很多资料,都不是C语言的,结果一怒之下,就自己写了一个,程序如下:


/********************************************************
 * @author  Airead Fan <fgh1987168@gmail.com>		*
 * @date    2011 9月 25 10:31:03 CST			*
 ********************************************************
 *		after studying C 69 days		*
 *		after studying APUE 34 days		*
 ********************************************************/

/*
 * This program demonstrates method
 * change utf-8 to unsigned long
 */

#include <stdio.h>
#include <stdlib.h>

unsigned long utf8tolong(unsigned char *str, int *offset)
{
	unsigned long utflong;	/* return value */

	utflong = 0;

	if(str[*offset] < 0x80){	
		/* 1 unit */
		utflong |= str[*offset];
		*offset += 1;
	}else if(str[*offset] < 0xe0){
		/* 2 unit */
		utflong |= (str[*offset] & ~(0xe0));
		utflong <<= 7;

		utflong |= (str[*offset + 1] & ~(0xc0));

		utflong >>=2;

		*offset += 2;
	}else if(str[*offset] < 0xf0){
		/* 3 unit */
		utflong |= (str[*offset] & ~(0xe0));
		utflong <<= 8;

		utflong |= ((str[*offset + 1] & ~(0xc0)) << 2);
		utflong <<= 6;

		utflong |= ((str[*offset + 2] & ~(0xc0)) << 2);

		utflong >>= 2;

		*offset += 3;
	}
	
	return utflong;
}

int main(int argc, char *argv[])
{
	if(argc < 2){
		fprintf(stderr, "usage: %s <utf str>\n", argv[0]);
		exit(1);
	}

	int offset;
	int num;
	unsigned long utflong;

	offset = 0;
	num = 0;
	while(argv[1][offset] != '\0'){
		utflong = utf8tolong((unsigned char *)argv[1], &offset);
		num++;
		fprintf(stdout, "[%d]%lx\n", num, utflong);
	}

	return 0;
}


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值