UTF-8的格式标志其实就是文件头的三个字节:0xEF,0xBB,0xBF

用UltraEdit打开这个xml,UltraEdit就把它认为是一个UTF-8的文本文件了


tinyxml_utf_8.PNG




打开UTF8 的xml文件需要使用转换函数WideCharToMultiByte和MultiByteToWideChar

void ConvertUtf8ToGBK(CString& strUtf8)

{

int len = MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8.GetBuffer(), -1, NULL, 0);

wchar_t * wszGBK = new wchar_t[len + 1];

memset(wszGBK, 0, len * 2 + 2);

MultiByteToWideChar(CP_UTF8, 0, (LPCTSTR)strUtf8.GetBuffer(), -1, wszGBK, len);


len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);

char *szGBK = new char[len + 1];

memset(szGBK, 0, len + 1);

WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, szGBK, len, NULL, NULL);


strUtf8 = szGBK;

delete[] szGBK;

delete[] wszGBK;

}


写UTF8文件:

在tinyxml.cpp文件中搜索“useMicrosoftBOM”,把“useMicrosoftBOM = false;”这个语句改为“useMicrosoftBOM = true;”就可以了  ----测试不行。


void  CDuiFrameWnd::ConvertGBKToUtf8(CString& strGBK)

{

int len = MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK.GetBuffer(), -1, NULL, 0);

wchar_t * wszGBK = new wchar_t[len + 1];

memset(wszGBK, 0, len * 2 + 2);

MultiByteToWideChar(CP_ACP, 0, (LPCTSTR)strGBK.GetBuffer(), -1, wszGBK, len);

len = WideCharToMultiByte(CP_UTF8, 0, wszGBK, -1, NULL, 0, NULL, NULL);

char *szGBK = new char[len + 1];

memset(szGBK, 0, len + 1);

WideCharToMultiByte(CP_UTF8, 0, wszGBK, -1, szGBK, len, NULL, NULL);

strGBK = szGBK;

delete[] szGBK;

delete[] wszGBK;

}

写入正常显示中文