关于C++字符集转换问题
WINDOWS系统下:
UTF-8转GBK
char* U2G(const char* utf8)
{
int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len + 1];
memset(wstr, 0, len + 1);
MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wstr, len);
len = WideCharToMultiByte(CP_ACP, 0, wstr, -1, NULL, 0, NULL, NULL);
char* str = new char[len + 1];
memset(str, 0, len + 1);
WideCharToMultiByte(CP_ACP, 0, wstr, -1, str, len, NULL, NULL);
if (wstr) delete[] wstr;
return str;
}
GBK转UTF-8
char* G2U(const char* gb2312)
{
int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len + 1];
memset(wstr, 0, len + 1);
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
char* str = new char[len + 1];
memset(str, 0, len + 1);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
if (wstr) delete[] wstr;
return str;
}
多字节转宽字节
LPWSTR toUnicode(const char* _str)
{
LPWSTR _ret;
int _len = strlen(_str) * 2;
_ret = new WCHAR[_len];
MultiByteToWideChar(CP_ACP, 0, _str, -1, _ret, _len);
return _ret;
}
宽字节转多字节
char * Utils::toAscii(LPWSTR str)
{
char* pElementText;
int iTextLen;
iTextLen = WideCharToMultiByte(CP_ACP,
0,
str,
-1,
NULL,
0,
NULL,
NULL);
pElementText = new char[iTextLen + 1];
memset((void*)pElementText, 0, sizeof(char)* (iTextLen + 1));
::WideCharToMultiByte(CP_ACP,
0,
str,
-1,
pElementText,
iTextLen,
NULL,
NULL);
std::string ret;
ret = pElementText;
delete[] pElementText;
return (char*)ret.c_str();
}
LINUX系统下的转换:
#include <iconv.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <sys/stat.h>
int code_convert(char *from_charset, char *to_charset, char *inbuf, size_t inlen,char *outbuf, size_t outlen)
{
iconv_t cd;
char **pin = &inbuf;
char **pout = &outbuf;
cd = iconv_open(to_charset, from_charset);
if (cd == 0)
return -1;
memset(outbuf, 0, outlen);
if (iconv(cd, pin, &inlen, pout, &outlen) == -1)
return -1;
iconv_close(cd);
*pout = '\0';
return 0;
}
int u2g(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen);
}
int g2u(char *inbuf, size_t inlen, char *outbuf, size_t outlen) {
return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen);
}
int main(void)
{
char *s = "中国";
int fd = open("test.txt", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
char buf[10];
u2g(s, strlen(s), buf, sizeof(buf));
write(fd, buf, strlen(buf));
close(fd);
fd = open("test.txt2", O_RDWR|O_CREAT, S_IRUSR | S_IWUSR);
char buf2[10];
g2u(buf, strlen(buf), buf2, sizeof(buf2));
write(fd, buf2, strlen(buf2));
close(fd);
return 1;
}