采用libiconv字符编码库,它包含了几乎所有的各种字符集,可以不依赖linux平台的字符集作为单独一个动态库存在!
libiconv-1.9.1 字符编码转换库编译方法
X86 Platform:
1> make distclean
2> ./configure
3> make
arca & ixp425 Platform:
1> 进行 X86 平台的./configure
2> 修改
./lib/Makefile
./libcharset/lib/Makefile
./libcharset/libtool
./src/Makefile
./srclib/Makefile
./libtool
将这些文件软链接重新链接到相应目录下的各个平台文件
arca: libtool.arca Makefile.arca
ixp425: libtoo.425 Makefile.425
3> make
将会在 ./lib/下生成libiconv_plug_linux.so 库文件,即可使用.
(./lib/.lib/libiconv.so.2.2.0 是libtoo脚本工具生成的libiconv.la,也可以使用)
测试代码:
#define tmpbufsize 4096
int iconv_string ( const char* tocode, const char* fromcode,
const char* start, const char* end,
char** resultp, size_t* lengthp )
{
iconv_t cd = iconv_open( tocode, fromcode );
size_t length;
char* result;
if ( cd == ( iconv_t ) ( -1 ) ) {
if ( errno != EINVAL )
return -1;
/* Unsupported fromcode or tocode. Check whether the caller requested
autodetection. */
if ( !strcmp( fromcode, "autodetect_utf8" ) ) {
int ret;
/* Try UTF-8 first. There are very few ISO-8859-1 inputs that would
be valid UTF-8, but many UTF-8 inputs are valid ISO-8859-1. */
ret = iconv_string( tocode, "UTF-8", start, end, resultp, lengthp );
if ( !( ret < 0 && errno == EILSEQ ) )
return ret;
ret = iconv_string( tocode, "ISO-8859-1", start, end, resultp, lengthp );
return ret;
}
if ( !strcmp( fromcode, "autodetect_jp" ) ) {
int ret;
/* Try 7-bit encoding first. If the input contains bytes >= 0x80,
it will fail. */
ret = iconv_string( tocode, "ISO-2022-JP-2", start, end, resultp, lengthp );
if ( !( ret < 0 && errno == EILSEQ ) )
return ret;
/* Try EUC-JP next. Short SHIFT_JIS inputs may come out wrong. This
is unavoidable. People will condemn SHIFT_JIS.
If we tried SHIFT_JIS first, then some short EUC-JP inputs would
come out wrong, and people would condemn EUC-JP and Unix, which
would not be good. */
ret = iconv_string( tocode, "EUC-JP", start, end, resultp, lengthp );
if ( !( ret < 0 && errno == EILSEQ ) )
return ret;
/* Finally try SHIFT_JIS. */
ret = iconv_string( tocode, "SHIFT_JIS", start, end, resultp, lengthp );
return ret;
}
if ( !strcmp( fromcode, "autodetect_kr" ) ) {
int ret;
/* Try 7-bit encoding first. If the input contains bytes >= 0x80,
it will fail. */
ret = iconv_string( tocode, "ISO-2022-KR", start, end, resultp, lengthp );
if ( !( ret < 0 && errno == EILSEQ ) )
return ret;
/* Finally try EUC-KR. */
ret = iconv_string( tocode, "EUC-KR", start, end, resultp, lengthp );
return ret;
}
errno = EINVAL;
return -1;
}
/* Determine the length we need. */
{
size_t count = 0;
char tmpbuf[ tmpbufsize ];
const char* inptr = start;
size_t insize = end - start;
while ( insize > 0 )
{
char * outptr = tmpbuf;
size_t outsize = tmpbufsize;
size_t res = iconv( cd, &inptr, &insize, &outptr, &outsize );
if ( res == ( size_t ) ( -1 ) && errno != E2BIG ) {
if ( errno == EINVAL )
break;
else {
int saved_errno = errno;
iconv_close( cd );
errno = saved_errno;
return -1;
}
}
count += outptr - tmpbuf;
}
{
char* outptr = tmpbuf;
size_t outsize = tmpbufsize;
size_t res = iconv( cd, NULL, NULL, &outptr, &outsize );
if ( res == ( size_t ) ( -1 ) ) {
int saved_errno = errno;
iconv_close( cd );
errno = saved_errno;
return -1;
}
count += outptr - tmpbuf;
}
length = count;
}
if ( lengthp != NULL )
* lengthp = length;
if ( resultp == NULL ) {
iconv_close( cd );
return 0;
}
result = ( *resultp == NULL ? malloc( length ) : realloc( *resultp, length ) );
*resultp = result;
if ( length == 0 ) {
iconv_close( cd );
return 0;
}
if ( result == NULL ) {
iconv_close( cd );
errno = ENOMEM;
return -1;
}
iconv( cd, NULL, NULL, NULL, NULL ); /* return to the initial state */
/* Do the conversion for real. */
{
const char* inptr = start;
size_t insize = end - start;
char* outptr = result;
size_t outsize = length;
while ( insize > 0 )
{
size_t res = iconv( cd, &inptr, &insize, &outptr, &outsize );
if ( res == ( size_t ) ( -1 ) ) {
if ( errno == EINVAL )
break;
else {
int saved_errno = errno;
iconv_close( cd );
errno = saved_errno;
return -1;
}
}
}
{
size_t res = iconv( cd, NULL, NULL, &outptr, &outsize );
if ( res == ( size_t ) ( -1 ) ) {
int saved_errno = errno;
iconv_close( cd );
errno = saved_errno;
return -1;
}
}
if ( outsize != 0 )
abort();
}
iconv_close( cd );
return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <iconv.h>
#include "iconv_string.h"
int main()
{
const char *s = "我来也";
char * result = NULL;
int size = 0;
if ( iconv_string( "utf-8", "gb2312", s, s + strlen(s), &result, &size ) < 0 ) {
printf( "iconv_string /n" );
}
else {
printf("len=%d/n", size );
printf( "%s/n" , result );
}
}
Makefile
CC=gcc
CFLAGS= -g
export CC
export CFLAGS
INCS= -I../include
LIBS= -liconv_plug_linux
%o: %.c %.h
test:test.o iconv_string.o
${CC} $^ -o $@ ${LIBS}
clean:
rm -f *.o test