使用google::protobuf进行序列化,在运行时有如下警告!
libprotobuf ERROR google/protobuf/wire_format.cc:1059] Encountered string containing invalid UTF-8 data while serializing protocol buffer. Strings must contain only UTF-8; use the 'bytes' type for raw bytes.
libprotobuf ERROR google/protobuf/wire_format.cc:1059] Encountered string containing invalid UTF-8 data while serializing protocol buffer. Strings must contain only UTF-8; use the 'bytes' type for raw bytes.
原因:要求所有的string类型都必须为UTF-8类型的,可以使用<iconv.h> 进行格式转化。
介绍一下字符集相关知识:
在技术编码方面上,演化顺序为:ASCII ⇒ GB2312 ⇒ GBK ⇒ GB18030
先面贴一段转化的代码:
include <string>
#include <stdlib.h>
#include <iostream>
using namespace std;
#include <iconv.h>
bool convertGbk2Utf(string& instr, string& outstr)
{
iconv_t gbk2UtfDescriptor;
gbk2UtfDescriptor = iconv_open("UTF-8", "GBK");
size_t inlen = instr.length();
char* in = const_cast<char*>(instr.c_str());
size_t outlen = inlen * 2 + 1; // inlen * 1.5 + 1 >= outlen >= inlen + 1
char* outbuf = (char*)::malloc(outlen);
char* out = outbuf;
memset(outbuf, 0x0, outlen);
if((size_t)-1 == iconv(gbk2UtfDescriptor, &in, &inlen, &out, &outlen))
{
::free(outbuf);
return false;
}
outstr.clear();
outstr.append(outbuf);
::free(outbuf);
return true;
}
int main()
{
string str = "黄";
convertGbk2Utf(str,str);
cout << str << endl;
}