C++ MFC 对话框项目中抓取编码为utf-8的网页时会出现中文乱码。项目 -》属性-》 常规 中 字符集 为使用多字节字符集。
找了很长时间才找到这个函数。中间过程各种苦啊,不说了下面贴代码。这一段是函数的使用:
CString url;
url = "https://www.badiu.com";
CInternetSession session(NULL,0 );
CHttpFile * file = NULL;
try
{
file = (CHttpFile*)session.OpenURL(url);
}
catch (CInternetException *pException)
{
pException->ReportError();
pException->Delete();
return 0;
}
CString strContent;
CString strtemp;
while (file->ReadString(strtemp))
{
strContent += Convert(strtemp, CP_UTF8, CP_ACP);
}
file->Close();
delete file;
session.Close();
OutputDebugString(strContent);
下面部分为函数部分:(注:我这里把它做为全局函数用的)
CString Convert(CString& str, int sourceCodePage, int targetCodePage)
{
int iUnicodeLen = MultiByteToWideChar(sourceCodePage, 0, (LPCSTR)str.GetBuffer(), -1, NULL, 0);
wchar_t *pUnicode = NULL;
pUnicode = new wchar_t[iUnicodeLen + 1];
memset(pUnicode, 0, (iUnicodeLen + 1) * sizeof(wchar_t));
MultiByteToWideChar(sourceCodePage, 0, (LPCSTR)str.GetBuffer(), -1, (LPWSTR)pUnicode, iUnicodeLen);
BYTE *pTargetData = NULL;
int iTargetLen = WideCharToMultiByte(targetCodePage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, 0, NULL, NULL);
pTargetData = new BYTE[iTargetLen + 1];
memset(pTargetData, 0, iTargetLen + 1);
WideCharToMultiByte(targetCodePage, 0, (LPWSTR)pUnicode, -1, (char*)pTargetData, iTargetLen, NULL, NULL);
CString strFormatTargetData;
strFormatTargetData.Format(_T("%s"), pTargetData);
delete[] pTargetData;
delete[] pUnicode;
pTargetData = NULL;
pUnicode = NULL;
return strFormatTargetData;
}
希望能帮到大家。