首先,
有一个可用现成gbk2312转换成utf8的函数:
char*AppDelegate::transToUTF8(const char* gb2312){
int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len + 1];
memset(wstr, 0, len + 1);
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
char* str = new char[len + 1];
memset(str, 0, len + 1);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
if (wstr) delete[] wstr;
return str;
}
其次
查看ConvertUTF.h文件,看里边的转换方法
#ifndef LLVM_SUPPORT_CONVERTUTF_H
#define LLVM_SUPPORT_CONVERTUTF_H
#include <stddef.h> /* ptrdiff_t */
/* ---------------------------------------------------------------------
The following 4 definitions are compiler-specific.
The C standard does not guarantee that wchar_t has at least
16 bits, so wchar_t is no less portable than unsigned short!
All should be unsigned values to avoid sign extension during
bit mask & shift operations.
------------------------------------------------------------------------ */
typedef unsigned int UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */
typedef unsigned char Boolean; /* 0 or 1 */
/* Some fundamental constants */
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
#define UNI_MAX_BMP (UTF32)0x0000FFFF
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
typedef enum {
conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
targetExhausted, /* insuff. room in target for conversion */
sourceIllegal /* source sequence is illegal/malformed */
} ConversionResult;
typedef enum {
strictConversion = 0,
lenientConversion
} ConversionFlags;
/* This is for C++ and does no harm in C */
#ifdef __cplusplus
extern "C" {
#endif
ConversionResult ConvertUTF8toUTF16 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
ConversionResult ConvertUTF8toUTF32 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
ConversionResult ConvertUTF16toUTF8 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
ConversionResult ConvertUTF32toUTF8 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
ConversionResult ConvertUTF16toUTF32 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
ConversionResult ConvertUTF32toUTF16 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
unsigned getNumBytesForUTF8(UTF8 firstByte);
int getUTF8StringLength(const UTF8* utf8);
#ifdef __cplusplus
}
/*************************************************************************/
/* Below are LLVM-specific wrappers of the functions above. */
//#include "llvm/ADT/ArrayRef.h"
//#include "llvm/ADT/StringRef.h"
#include <vector>
#include <string>
namespace llvm {
/**
* Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
* WideCharWidth. The converted data is written to ResultPtr, which needs to
* point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
* ResultPtr will point one after the end of the copied string. On failure,
* ResultPtr will not be changed, and ErrorPtr will be set to the location of
* the first character which could not be converted.
* \return true on success.
*/
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
char *&ResultPtr, const UTF8 *&ErrorPtr);
/**
* Convert an Unicode code point to UTF8 sequence.
*
* \param Source a Unicode code point.
* \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
* \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes. On success \c ResultPtr is
* updated one past end of the converted sequence.
*
* \returns true on success.
*/
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
/**
* Convert the first UTF8 sequence in the given source buffer to a UTF32
* code point.
*
* \param [in,out] source A pointer to the source buffer. If the conversion
* succeeds, this pointer will be updated to point to the byte just past the
* end of the converted sequence.
* \param sourceEnd A pointer just past the end of the source buffer.
* \param [out] target The converted code
* \param flags Whether the conversion is strict or lenient.
*
* \returns conversionOK on success
*
* \sa ConvertUTF8toUTF32
*/
static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
const UTF8 *sourceEnd,
UTF32 *target,
ConversionFlags flags) {
if (*source == sourceEnd)
return sourceExhausted;
unsigned size = getNumBytesForUTF8(**source);
if ((ptrdiff_t)size > sourceEnd - *source)
return sourceExhausted;
return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
}
/**
* Returns true if a blob of text starts with a UTF-16 big or little endian byte
* order mark.
*/
bool hasUTF16ByteOrderMark(const char* SrcBytes, size_t len);
/**
* Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
*
* \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
* \param [out] Out Converted UTF-8 is stored here on success.
* \returns true on success
*/
bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out);
} /* end namespace llvm */
#endif
先解决第一个问题,我们使用VS2010开发的时候,调试的时候,中文打印出来都是乱码,这个问题很纠结。
如下图:
- CCLOG("cclog: 测试使用标签的自动换行和个别字体大写");LOGNEWLINE;
输出结果如下图:
- <dict>
- cocos2d.x.version: 2.1rc0-x-2.1.4
- cocos2d.x.compiled_with_profiler: false
- cocos2d.x.compiled_with_gl_state_cache: true
- gl.vendor: NVIDIA Corporation
- gl.renderer: GeForce 310M/PCIe/SSE2
- gl.version: 3.3.0
- gl.max_texture_size: 8192
- gl.max_texture_units: 96
- gl.supports_PVRTC: false
- gl.supports_NPOT: true
- gl.supports_BGRA8888: false
- gl.supports_discard_framebuffer: false
- gl.supports_vertex_array_object: true
- </dict>
- cclog: ?????????????????к?????????д
这是很大的一个问题,是吗?英语不是很熟练的小伙伴们?
不得不说,VS2010这个开发工具没说的,真是开发者的杯具开始。尤其是对我使用MAC非常熟悉的人来说。
好的,废话不说,看下怎么使用控制台来调试我们的代码,能够正常输入中文。
这里需要配置main.c,通过它来调出控制台输出中文,但是有一点限制,就是必须使用cout 或者 printf ,而不能使用CCLog。
直接贴出代码:
- #include "main.h"
- #include "AppDelegate.h"
- #include "CCEGLView.h"
- #include "net/NetWork.h"
- #include "adapter/RouterAdapter.h"
- USING_NS_CC;
- #define USE_WIN32_CONSOLE
- int APIENTRY _tWinMain(HINSTANCE hInstance,
- HINSTANCE hPrevInstance,
- LPTSTR lpCmdLine,
- int nCmdShow)
- {
- UNREFERENCED_PARAMETER(hPrevInstance);
- UNREFERENCED_PARAMETER(lpCmdLine);
- #ifdef USE_WIN32_CONSOLE
- AllocConsole();
- freopen("CONIN$","r",stdin);
- freopen("CONOUT$","w",stdout);
- freopen("CONOUT$","w",stderr);
- #endif
- // create the application instance
- CCEGLView* eglView = CCEGLView::sharedOpenGLView();
- eglView->setViewName("MT");
- //eglView->setFrameSize(480,320);
- return CCApplication::sharedApplication()->run();
- #ifdef USE_WIN32_CONSOLE
- FreeConsole();
- #endif
- }
通过这个USE_WIN32_CONSOLE来实现我们的代码调试。测试代码如下
- CCLOG("cclog: 测试使用标签的自动换行和个别字体大写");LOGNEWLINE;
- printf("printf: 测试使用标签的自动换行和个别字体大写");LOGNEWLINE;
VS2010输出窗口如下:
- <dict>
- cocos2d.x.version: 2.1rc0-x-2.1.4
- cocos2d.x.compiled_with_profiler: false
- cocos2d.x.compiled_with_gl_state_cache: true
- gl.vendor: NVIDIA Corporation
- gl.renderer: GeForce 310M/PCIe/SSE2
- gl.version: 3.3.0
- gl.max_texture_size: 8192
- gl.max_texture_units: 96
- gl.supports_PVRTC: false
- gl.supports_NPOT: true
- gl.supports_BGRA8888: false
- gl.supports_discard_framebuffer: false
- gl.supports_vertex_array_object: true
- </dict>
- cclog: ?????????????????к?????????д
为什么只是输出CCLog,printf 呢?
看下控制台,如下:
然后是第二个问题,模拟器上竟然也是乱码!!!如果英语很好,在调试的时候,可以使用英文代替,但是到了开发的时候了,模拟器上中文竟然也是乱码??!
这是造的。
测试代码:
- CCLabelTTF *testLabel = CCLabelTTF::create("测试使用标签的自动换行和个别字体大写,test","Zapfino",30);
- testLabel->setPosition(ccp(visibleSize.width*0.5,visibleSize.height*0.2));
- testLabel->setColor(ccc3(200,200,200));
- this->addChild(testLabel,1);
模拟器运行如下图:
有了时间在写,很快。
好的,让我们来解决这个问题吧。
为什么会出现这个问题呢?那是因为VS2010 使用的编码方式是 GB2321, 这时只有我们把Unicide编码 转换成 GB2321.
这里还需要一个wstring的数据类型。
这个是Windows应用商店的介绍。实际上我们不需要关注太多,只需要知道wchar wchar_t是C/C++的字符数据类型,是一种扩展的字符存储方式,wchar_t类型主要用在国际化程序的实现中,但它不等同于unicode编码。unicode编码的字符一般以wchar_t类型存储。char是8位字符类型,最多只能包含256种字符,许多外文字符集所含的字符数目超过256个,char型无法表示。
wchar_t数据类型一般为16位或32位,但不同的C或C++库有不同的规定,如GNU Libc规定wchar_t为32位[1],总之,wchar_t所能表示的字符数远超char型。
标准C++中的wprintf()函数以及iostream类库中的类和对象能提供wchar_t宽字符类型的相关操作。
- wstring
- Visual Studio 2012 其他版本
- 声明宽字符字符串。 wstring 是专用模板选件类 basic_string 包含 wchar_t 字符串数据的typedef。 wstring 继承 basic_string 选件类的功能;这包括运算符和方法。
- 专用 basic_string 的其他typedef包括 字符串、u16string和 u32string。 有关更多信息,请参见 string 成员。
- typedef basic_string<wchar_t, char_traits<wchar_t>, allocator<wchar_t> > wstring;
- 示例
- // string_wstring.cpp
- // compile with: /EHsc
- #include <string>
- #include <iostream>
- int main( )
- {
- using namespace std;
- // Equivalent ways to declare an object of type
- // basic_string <wchar_t>
- const basic_string <wchar_t> s1 ( L"abc" );
- wstring s2 ( L"abc" ); // Uses the typedef for wstring
- // Comparison between two objects of type basic_string <wchar_t>
- if ( s1 == s2 )
- cout << "The strings s1 & s2 are equal." << endl;
- else
- cout << "The strings s1 & s2 are not equal." << endl;
- }
- 字符串文件& s2相等。
- 要求
- 标头: <string>
- 命名空间: std
把wstring 当成 string去使用就OK了。
SO,我们只是需要把Unicide 编码转成 gbk,就可以在 VS2010使用中文显示 ,并可以在模拟器上显示中文。
这里有一个转化类,一个简单的接口就可以解决我们的第二个问题。
废话不说,代码才是王道。
- #pragma once
- #include <iostream>
- #include <stdio.h>
- //string是c++ 的头文件,其内包含了一个string类,string s1就是建立一个string类的对象
- #include <string>
- //cstring.h 是对应于旧C 头文件的std 版本
- #include <cstringt.h>
- #define LOGNEWLINE printf("\n")
- using namespace std;
- class UTF8ToGBK
- {
- public:
- UTF8ToGBK(void);
- ~UTF8ToGBK(void);
- //将utf8格式编码转化成gbk,vs2010的默认的编码格式
- static string UTF8TOGBK(const wstring text);
- };
- #include "UTF8ToGBK.h"
- UTF8ToGBK::UTF8ToGBK(void)
- {
- }
- UTF8ToGBK::~UTF8ToGBK(void)
- {
- }
- //将utf8格式编码转化成gbk,vs2010的默认的编码格式
- string UTF8ToGBK::UTF8TOGBK(const wstring text)
- {
- wstring tes;
- int asciSize = WideCharToMultiByte(CP_UTF8,0,text.c_str(),text.size(),NULL,0,NULL,NULL);
- if (asciSize == ERROR_NO_UNICODE_TRANSLATION || asciSize == 0)
- {
- return string();
- }
- char *resultString = new char[asciSize];
- int conveResult = WideCharToMultiByte(CP_UTF8,0,text.c_str(),text.size(),resultString,asciSize,NULL,NULL);
- if (conveResult != asciSize)
- {
- return string();
- }
- string buffer = "";
- buffer.append(resultString,asciSize);
- //CCLog("CCLog: buffer: %s",buffer.c_str());LOGNEWLINE;
- //buffer = strcpy();
- //printf("resultString1: %s",resultString);LOGNEWLINE;
- //CCLog("resultString2: %s",resultString);LOGNEWLINE;
- delete[] resultString; //释放内存
- return buffer;
- }
好的,现在让我们测试一下。
测试代码如下:
- wstring testWstring1 = L"测试使用标签的自动换行和个别字体大写,test";
- wstring testWstring2 = L"测试使用标签的自动换行和个别字体大写,test";
- string testString1 = this->UTF8TOGBK(testWstring1);
- string testString2 = this->UTF8TOGBK(testWstring2);
- CCLog("cclog: testString1: %s",testString1.c_str());
- CCLog("cclog: testString2: %s",testString2.c_str());
- printf("printf: testString1: %s",testString1.c_str());LOGNEWLINE;
- printf("printf: testString2: %s",testString2.c_str());LOGNEWLINE;
- //测试标签自动换行
- //测试使用标签的自动换行和个别字体大写,以及对齐方式
- string test = "测试使用标签的自动换行和个别字体大写";
- //CCLabelTTF *testLabel = CCLabelTTF::create(testString1.c_str(),"Zapfino",30);
- //string testString1 = this->UTF8TOGBK(testWstring1);
- CCLabelTTF *testLabel = CCLabelTTF::create(testString1.c_str(),"Zapfino",30);
- testLabel->setPosition(ccp(visibleSize.width*0.5,visibleSize.height*0.2));
- testLabel->setColor(ccc3(200,200,200));
- this->addChild(testLabel,1);
好的,分别看下VS2010 的输出框和 控制台输出框。
先来VS2010的:
可以发现,转码后,VS认到我们的祖宗文字了。
然后看控制台的输出:
也是OK的,细心的小伙伴们肯定发现了什么?
是的,控制台可以输出 Unicide 编码 但是 需要使用 cout 和 printf ,如果输出 GNK 编码,则需要 CCLog 输出。
VS2010 比较单纯,只认 GBK 编码 和 CCLog 输出。
差点忘了效果图,贴上: