问题说明
如果直接读取uft-8/uft-8-BOM
的文件,在调试窗口查看其中中文变量时,发现均为乱码,可能影响后续的程序运行。
方法
在windows平台下,定义一个函数专门用于转换中文编码,从utf-8
到GBK
。
#ifdef _WIN32 // Includes both 32 bit and 64 bit
#include <windows.h>
string UTF8ToGB(const char* str)
{
string result;
WCHAR* strSrc;
LPSTR szRes;
//获得临时变量的大小
int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
strSrc = new WCHAR[i + 1];
MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i);
//获得临时变量的大小
i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
szRes = new CHAR[i + 1];
WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);
result = szRes;
delete[]strSrc;
delete[]szRes;
return result;
}
#else
printf("Not a Windows OS\n");
#endif
h_csv read_h_csv(string filename, char delimiter) {
ifstream fin(filename);
string line;
char temp[1000];
std::memset(temp, 0, 1000);
if (!fin.good())
{
cout << "file : " << filename << "read failed!" << endl;
}
h_csv outvecvecstr;
int i = 0;
while (fin.peek() != EOF && getline(fin, line))
{
stringstream sin(line);
std::vector<string> fields;
string field;
split(line, delimiter, fields); //csv文件
if (i >= 0)
{
std::vector<string> row_datas;
#ifdef _WIN32 // Includes both 32 bit and 64 bit
#ifdef _WIN64
printf("Windows 64 bit\n");
for (int ln = 0; ln < fields.size(); ln++)
{
row_datas.push_back(UTF8ToGB(fields[ln].c_str()));
}
#else
printf("Windows 32 bit\n");
for (int ln = 0; ln < fields.size(); ln++)
{
row_datas.push_back(UTF8ToGB(fields[ln].c_str()));
}
#endif
#else
printf("Not a Windows OS\n");
for (int ln = 0; ln < fields.size(); ln++)
{
row_datas.push_back((fields[ln]));
}
#endif
outvecvecstr.push_back(row_datas);
}
if (i == -1)
{
std::vector<string> labels;
std::vector<string> row_datas;
for (int ln = 1; ln < fields.size(); ln++)
{
row_datas.push_back((fields[ln]));
}
}
i++;
}
fin.close();
return outvecvecstr;
}