cocos2dx转码toUTF

最新推荐文章于 2019-12-19 10:24:14 发布

bstoneb

最新推荐文章于 2019-12-19 10:24:14 发布

阅读量1.4k

点赞数

分类专栏：程序文章标签： cocos2dx311

程序专栏收录该内容

1 篇文章 0 订阅

订阅专栏

首先，

有一个可用现成gbk2312转换成utf8的函数：

char*AppDelegate::transToUTF8(const char* gb2312){
int len = MultiByteToWideChar(CP_ACP, 0, gb2312, -1, NULL, 0);
wchar_t* wstr = new wchar_t[len + 1];
memset(wstr, 0, len + 1);
MultiByteToWideChar(CP_ACP, 0, gb2312, -1, wstr, len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
char* str = new char[len + 1];
memset(str, 0, len + 1);
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL);
if (wstr) delete[] wstr;
return str;

}

其次

查看ConvertUTF.h文件，看里边的转换方法

#ifndef LLVM_SUPPORT_CONVERTUTF_H
#define LLVM_SUPPORT_CONVERTUTF_H

#include <stddef.h> /* ptrdiff_t */
/* ---------------------------------------------------------------------
The following 4 definitions are compiler-specific.
The C standard does not guarantee that wchar_t has at least
16 bits, so wchar_t is no less portable than unsigned short!
All should be unsigned values to avoid sign extension during
bit mask & shift operations.
------------------------------------------------------------------------ */

typedef unsigned int UTF32; /* at least 32 bits */
typedef unsigned short UTF16; /* at least 16 bits */
typedef unsigned char UTF8; /* typically 8 bits */
typedef unsigned char Boolean; /* 0 or 1 */

/* Some fundamental constants */
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
#define UNI_MAX_BMP (UTF32)0x0000FFFF
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF

#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4

#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE

typedef enum {
conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
targetExhausted, /* insuff. room in target for conversion */
sourceIllegal /* source sequence is illegal/malformed */
} ConversionResult;

typedef enum {
strictConversion = 0,
lenientConversion
} ConversionFlags;

/* This is for C++ and does no harm in C */
#ifdef __cplusplus
extern "C" {
#endif

ConversionResult ConvertUTF8toUTF16 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);

ConversionResult ConvertUTF8toUTF32 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);

ConversionResult ConvertUTF16toUTF8 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);

ConversionResult ConvertUTF32toUTF8 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);

ConversionResult ConvertUTF16toUTF32 (
const UTF16** sourceStart, const UTF16* sourceEnd,
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);

ConversionResult ConvertUTF32toUTF16 (
const UTF32** sourceStart, const UTF32* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);

Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);

Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);

unsigned getNumBytesForUTF8(UTF8 firstByte);

int getUTF8StringLength(const UTF8* utf8);

#ifdef __cplusplus
}

/*************************************************************************/
/* Below are LLVM-specific wrappers of the functions above. */

//#include "llvm/ADT/ArrayRef.h"
//#include "llvm/ADT/StringRef.h"

#include <vector>
#include <string>

namespace llvm {

/**
* Convert an UTF8 StringRef to UTF8, UTF16, or UTF32 depending on
* WideCharWidth. The converted data is written to ResultPtr, which needs to
* point to at least WideCharWidth * (Source.Size() + 1) bytes. On success,
* ResultPtr will point one after the end of the copied string. On failure,
* ResultPtr will not be changed, and ErrorPtr will be set to the location of
* the first character which could not be converted.
* \return true on success.
*/
bool ConvertUTF8toWide(unsigned WideCharWidth, const std::string& Source,
char *&ResultPtr, const UTF8 *&ErrorPtr);

/**
* Convert an Unicode code point to UTF8 sequence.
*
* \param Source a Unicode code point.
* \param [in,out] ResultPtr pointer to the output buffer, needs to be at least
* \c UNI_MAX_UTF8_BYTES_PER_CODE_POINT bytes. On success \c ResultPtr is
* updated one past end of the converted sequence.
*
* \returns true on success.
*/
bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);

/**
* Convert the first UTF8 sequence in the given source buffer to a UTF32
* code point.
*
* \param [in,out] source A pointer to the source buffer. If the conversion
* succeeds, this pointer will be updated to point to the byte just past the
* end of the converted sequence.
* \param sourceEnd A pointer just past the end of the source buffer.
* \param [out] target The converted code
* \param flags Whether the conversion is strict or lenient.
*
* \returns conversionOK on success
*
* \sa ConvertUTF8toUTF32
*/
static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
const UTF8 *sourceEnd,
UTF32 *target,
ConversionFlags flags) {
if (*source == sourceEnd)
return sourceExhausted;
unsigned size = getNumBytesForUTF8(**source);
if ((ptrdiff_t)size > sourceEnd - *source)
return sourceExhausted;
return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
}

/**
* Returns true if a blob of text starts with a UTF-16 big or little endian byte
* order mark.
*/
bool hasUTF16ByteOrderMark(const char* SrcBytes, size_t len);

/**
* Converts a stream of raw bytes assumed to be UTF16 into a UTF8 std::string.
*
* \param [in] SrcBytes A buffer of what is assumed to be UTF-16 encoded text.
* \param [out] Out Converted UTF-8 is stored here on success.
* \returns true on success
*/
bool convertUTF16ToUTF8String(const std::u16string& utf16, std::string &Out);

} /* end namespace llvm */

#endif

先解决第一个问题，我们使用VS2010开发的时候，调试的时候，中文打印出来都是乱码，这个问题很纠结。

如下图：

 
   查看文本 
   打印 
   ? 
  
 CCLOG("cclog: 测试使用标签的自动换行和个别字体大写");LOGNEWLINE;  

输出结果如下图：

 
   查看文本 
   打印 
   ? 
  
 <dict>  
     cocos2d.x.version: 2.1rc0-x-2.1.4  
     cocos2d.x.compiled_with_profiler: false  
     cocos2d.x.compiled_with_gl_state_cache: true  
     gl.vendor: NVIDIA Corporation  
     gl.renderer: GeForce 310M/PCIe/SSE2  
     gl.version: 3.3.0  
     gl.max_texture_size: 8192  
     gl.max_texture_units: 96  
     gl.supports_PVRTC: false  
     gl.supports_NPOT: true  
     gl.supports_BGRA8888: false  
     gl.supports_discard_framebuffer: false  
     gl.supports_vertex_array_object: true  
 </dict>  
 cclog: ?????????????????к?????????д  

这是很大的一个问题，是吗？英语不是很熟练的小伙伴们？

不得不说，VS2010这个开发工具没说的，真是开发者的杯具开始。尤其是对我使用MAC非常熟悉的人来说。

好的，废话不说，看下怎么使用控制台来调试我们的代码，能够正常输入中文。

这里需要配置main.c，通过它来调出控制台输出中文，但是有一点限制，就是必须使用cout 或者 printf ，而不能使用CCLog。

直接贴出代码：

 
   查看文本 
   打印 
   ? 
  
 #include "main.h"  
 #include "AppDelegate.h"  
 #include "CCEGLView.h"  
 #include "net/NetWork.h"  
 #include "adapter/RouterAdapter.h"  
   
 USING_NS_CC;  
   
 #define USE_WIN32_CONSOLE  
   
 int APIENTRY _tWinMain(HINSTANCE hInstance,  
                        HINSTANCE hPrevInstance,  
                        LPTSTR    lpCmdLine,  
                        int       nCmdShow)  
 {  
     UNREFERENCED_PARAMETER(hPrevInstance);  
     UNREFERENCED_PARAMETER(lpCmdLine);  
   
 #ifdef USE_WIN32_CONSOLE  
     AllocConsole();  
     freopen("CONIN$","r",stdin);  
     freopen("CONOUT$","w",stdout);  
     freopen("CONOUT$","w",stderr);  
 #endif  
   
     // create the application instance  
     CCEGLView* eglView = CCEGLView::sharedOpenGLView();  
     eglView->setViewName("MT");  
     //eglView->setFrameSize(480,320);  
   
     return CCApplication::sharedApplication()->run();  
   
 #ifdef USE_WIN32_CONSOLE  
     FreeConsole();  
 #endif  
   
 }  

通过这个USE_WIN32_CONSOLE来实现我们的代码调试。测试代码如下

 
   查看文本 
   打印 
   ? 
  
 CCLOG("cclog: 测试使用标签的自动换行和个别字体大写");LOGNEWLINE;  
 printf("printf: 测试使用标签的自动换行和个别字体大写");LOGNEWLINE;  

VS2010输出窗口如下：

 
   查看文本 
   打印 
   ? 
  
 <dict>  
     cocos2d.x.version: 2.1rc0-x-2.1.4  
     cocos2d.x.compiled_with_profiler: false  
     cocos2d.x.compiled_with_gl_state_cache: true  
     gl.vendor: NVIDIA Corporation  
     gl.renderer: GeForce 310M/PCIe/SSE2  
     gl.version: 3.3.0  
     gl.max_texture_size: 8192  
     gl.max_texture_units: 96  
     gl.supports_PVRTC: false  
     gl.supports_NPOT: true  
     gl.supports_BGRA8888: false  
     gl.supports_discard_framebuffer: false  
     gl.supports_vertex_array_object: true  
 </dict>  
 cclog: ?????????????????к?????????д  

为什么只是输出CCLog，printf 呢？

看下控制台，如下：

然后是第二个问题，模拟器上竟然也是乱码！！！如果英语很好，在调试的时候，可以使用英文代替，但是到了开发的时候了，模拟器上中文竟然也是乱码？？！

这是造的。

测试代码：

 
   查看文本 
   打印 
   ? 
  
 CCLabelTTF *testLabel = CCLabelTTF::create("测试使用标签的自动换行和个别字体大写,test","Zapfino",30);  
     testLabel->setPosition(ccp(visibleSize.width*0.5,visibleSize.height*0.2));  
     testLabel->setColor(ccc3(200,200,200));  
     this->addChild(testLabel,1);  

模拟器运行如下图：

有了时间在写，很快。

好的，让我们来解决这个问题吧。

为什么会出现这个问题呢？那是因为VS2010 使用的编码方式是 GB2321，这时只有我们把Unicide编码转换成 GB2321.

这里还需要一个wstring的数据类型。

这个是Windows应用商店的介绍。实际上我们不需要关注太多，只需要知道wchar wchar_t是C/C++的字符数据类型，是一种扩展的字符存储方式，wchar_t类型主要用在国际化程序的实现中，但它不等同于unicode编码。unicode编码的字符一般以wchar_t类型存储。char是8位字符类型，最多只能包含256种字符，许多外文字符集所含的字符数目超过256个，char型无法表示。
wchar_t数据类型一般为16位或32位，但不同的C或C++库有不同的规定，如GNU Libc规定wchar_t为32位[1]，总之，wchar_t所能表示的字符数远超char型。
标准C++中的wprintf()函数以及iostream类库中的类和对象能提供wchar_t宽字符类型的相关操作。

 
   查看文本 
   打印 
   ? 
  
 wstring  
 Visual Studio 2012 其他版本   
 声明宽字符字符串。 wstring 是专用模板选件类 basic_string 包含 wchar_t 字符串数据的typedef。 wstring 继承 basic_string 选件类的功能;这包括运算符和方法。  
 专用 basic_string 的其他typedef包括 字符串、u16string和 u32string。 有关更多信息，请参见 string 成员。  
 typedef basic_string<wchar_t, char_traits<wchar_t>, allocator<wchar_t> > wstring;  
 示例  
  // string_wstring.cpp  
 // compile with: /EHsc  
 #include <string>  
 #include <iostream>  
   
 int main( )   
 {  
    using namespace std;  
    // Equivalent ways to declare an object of type  
    // basic_string <wchar_t>  
    const basic_string <wchar_t> s1 ( L"abc" );  
    wstring s2 ( L"abc" );   // Uses the typedef for wstring  
   
    // Comparison between two objects of type basic_string <wchar_t>  
    if ( s1 == s2 )  
       cout << "The strings s1 & s2 are equal." << endl;  
    else  
       cout << "The strings s1 & s2 are not equal." << endl;  
 }  
 字符串文件& s2相等。  
 要求  
 标头: <string>  
 命名空间: std  

把wstring 当成 string去使用就OK了。

SO，我们只是需要把Unicide 编码转成 gbk，就可以在 VS2010使用中文显示，并可以在模拟器上显示中文。

这里有一个转化类，一个简单的接口就可以解决我们的第二个问题。

废话不说，代码才是王道。

 
   查看文本 
   打印 
   ? 
  
 #pragma once  
   
 #include <iostream>  
 #include <stdio.h>  
 //string是c++ 的头文件，其内包含了一个string类，string s1就是建立一个string类的对象   
 #include <string>  
 //cstring.h 是对应于旧C 头文件的std 版本  
 #include <cstringt.h>  
   
 #define LOGNEWLINE printf("\n")  
 using namespace std;  
   
 class UTF8ToGBK  
 {  
 public:  
     UTF8ToGBK(void);  
     ~UTF8ToGBK(void);  
   
     //将utf8格式编码转化成gbk，vs2010的默认的编码格式  
     static string UTF8TOGBK(const wstring text);  
 };  

 
   查看文本 
   打印 
   ? 
  
 #include "UTF8ToGBK.h"  
   
   
 UTF8ToGBK::UTF8ToGBK(void)  
 {  
 }  
   
   
 UTF8ToGBK::~UTF8ToGBK(void)  
 {  
 }  
   
 //将utf8格式编码转化成gbk，vs2010的默认的编码格式  
 string UTF8ToGBK::UTF8TOGBK(const wstring text)  
 {  
     wstring tes;  
     int asciSize = WideCharToMultiByte(CP_UTF8,0,text.c_str(),text.size(),NULL,0,NULL,NULL);  
     if (asciSize == ERROR_NO_UNICODE_TRANSLATION || asciSize == 0)    
     {  
         return string();  
     }  
   
     char *resultString = new char[asciSize];  
     int conveResult = WideCharToMultiByte(CP_UTF8,0,text.c_str(),text.size(),resultString,asciSize,NULL,NULL);  
     if (conveResult != asciSize)  
     {  
         return string();  
     }  
     string buffer = "";  
     buffer.append(resultString,asciSize);  
       
     //CCLog("CCLog: buffer: %s",buffer.c_str());LOGNEWLINE;  
     //buffer = strcpy();  
     //printf("resultString1: %s",resultString);LOGNEWLINE;  
     //CCLog("resultString2: %s",resultString);LOGNEWLINE;  
   
     delete[] resultString; //释放内存  
     return buffer;  
 }  

好的，现在让我们测试一下。

测试代码如下：

 
   查看文本 
   打印 
   ? 
  
 wstring testWstring1 = L"测试使用标签的自动换行和个别字体大写,test";  
     wstring testWstring2 = L"测试使用标签的自动换行和个别字体大写,test";  
   
     string testString1 = this->UTF8TOGBK(testWstring1);  
     string testString2 = this->UTF8TOGBK(testWstring2);  
   
     CCLog("cclog: testString1: %s",testString1.c_str());  
     CCLog("cclog: testString2: %s",testString2.c_str());  
     printf("printf: testString1: %s",testString1.c_str());LOGNEWLINE;  
     printf("printf: testString2: %s",testString2.c_str());LOGNEWLINE;  
       
   
     //测试标签自动换行  
     //测试使用标签的自动换行和个别字体大写，以及对齐方式  
     string test = "测试使用标签的自动换行和个别字体大写";  
   
     //CCLabelTTF *testLabel = CCLabelTTF::create(testString1.c_str(),"Zapfino",30);  
     //string testString1 = this->UTF8TOGBK(testWstring1);  
     CCLabelTTF *testLabel = CCLabelTTF::create(testString1.c_str(),"Zapfino",30);  
     testLabel->setPosition(ccp(visibleSize.width*0.5,visibleSize.height*0.2));  
     testLabel->setColor(ccc3(200,200,200));  
     this->addChild(testLabel,1);  

好的，分别看下VS2010 的输出框和 控制台输出框。

先来VS2010的:

可以发现，转码后，VS认到我们的祖宗文字了。

然后看控制台的输出：

也是OK的，细心的小伙伴们肯定发现了什么？

是的，控制台可以输出 Unicide 编码但是需要使用 cout 和 printf ，如果输出 GNK 编码，则需要 CCLog 输出。

VS2010 比较单纯，只认 GBK 编码和 CCLog 输出。

差点忘了效果图，贴上：

bstoneb

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
cocos2dx转码toUTF

首先，先解决第一个问题，我们使用VS2010开发的时候，调试的时候，中文打印出来都是乱码，这个问题很纠结。如下图：查看文本打印?CCLOG("cclog: 测试使用标签的自动换行和个别字体大写");LOGNEWLINE; 输出结果如下图：查看文本打印? cocos2d.x.version: 2.
复制链接

扫一扫