//下面是学习power8博客敏感词比赛的时候,用到了他人的代码,在此记录一下,并对分享者表示感谢!
//先把文件读入内存,再对内存文件进行转码等处理,比使用ifstream getline要快n倍!
oid print_as_wide(const char* mbstr)
{
std::mbstate_t state = std::mbstate_t();
int len = 1 + std::mbsrtowcs(NULL, &mbstr, 0, &state);
wchar_t *wstr=(wchar_t *)malloc(sizeof(wchar_t)*(len));
std::mbsrtowcs(wstr, &mbstr, len, &state);
//std::wcout << "Wide string: " << &wstr[0] << '\n'
// << "The length, including '\\0': " << wstr.size() << '\n';
}
int main(int argc, char *argv[])
{
cout<<"argv = wordsFile blogFile resultFile"<<endl;
clock_t timeStart = clock();
filebuf *pbuf;
ifstream filestr;
long size;
char * buffer;
// 要读入整个文件,必须采用二进制打开
filestr.open ("_blog-mg_out", ios::binary);
// 获取filestr对应buffer对象的指针
pbuf=filestr.rdbuf();
// 调用buffer对象方法获取文件大小
size=pbuf->pubseekoff (0,ios::end,ios::in);
pbuf->pubseekpos (0,ios::in);
// 分配内存空间
buffer=new char[size];
// 获取文件内容
pbuf->sgetn (buffer,size);
filestr.close();
// 输出到标准输出
//cout.write (buffer,size);
std::setlocale(LC_ALL, "en_US.utf8");
print_as_wide(buffer);
delete []buffer;