首先秀一下简繁转换结果图,如下:
#include <iostream>
#include"../KOpenCC/KOpenCCExport.h"
#include<string>
#ifdef _WIN64
#pragma comment(lib,"../x64//Release/KOpenCC.lib")
#else
#pragma comment(lib,"../Release/KOpenCC.lib")
#endif // _WIN64
int main()
{
std::wcout.imbue(std::locale("chs"));
KWString s(L"KOpenCC.txt");
CKOpenCCExport m;
m.CKOpenCC_InitCCMap(s);
KWString pStrIn(L"我们一起去怼牛太公"), pStrOut;
std::wstring ws;
pStrOut = ws.c_str();
m.CKOpenCC_Switch(s,pStrIn, pStrOut,1);//简到繁
std::wcout << pStrIn.c_str() << " -> " << pStrOut.c_str() << L"\n";
pStrIn = ws.c_str();
m.CKOpenCC_Switch(s,pStrOut,pStrIn,2);//繁到简
std::wcout << pStrOut.c_str() << " -> " << pStrIn.c_str() << L"\n";
std::cin.get();
}
具体实现如下:
生成一个c++ dll工程,定义如下导出简繁转换类.
1.自定义接口字符串类 与 接口导出转换类
#ifndef H_88748467_32FF_4954_AF6C_B6C6DED7885F
#define H_88748467_32FF_4954_AF6C_B6C6DED7885F
#ifdef H_BCD969BF_81FD_4C53_B792_C128FB4C2F8E
#define COPENCCEXPORTDLLPORT __declspec(dllexport)
#else
#define COPENCCEXPORTDLLPORT __declspec(dllimport)
#endif
//定义接口宽字符串类 以统一内外字符数据类型
class KWString
{
public:
KWString& operator=(const wchar_t* pwstr)
{
if (NULL == pwstr)
{
m_pwsr = new wchar_t[1];
*m_pwsr = L'\0';
m_size = 0;
}
else
{
m_size = wcslen(pwstr);
m_pwsr = new wchar_t[m_size + 1];
memcpy(m_pwsr,pwstr, m_size*2);
m_pwsr[m_size] = L'\0';
}
return *this;
}
KWString& operator=(KWString& ws)
{
if (m_pwsr)
{
delete[] m_pwsr;
m_pwsr = nullptr;
}
m_size = ws.length();
m_pwsr = new wchar_t[ws.length() + 1];
memcpy(m_pwsr, ws.c_str(), ws.length()*2);
m_pwsr[m_size] = L'\0';
return *this;
}
size_t length()const
{
return m_size;
}
const wchar_t* c_str()const
{
return m_pwsr;
}
KWString()
{
m_pwsr = new wchar_t[1];
*m_pwsr = L'\0';
m_size = 0;
}
KWString(const wchar_t* pwstr)
{
if (NULL == pwstr)
{
m_pwsr = new wchar_t[1];
*m_pwsr = L'\0';
m_size = 0;
}
else
{
m_size = wcslen(pwstr);
m_pwsr = new wchar_t[m_size + 1];
memcpy(m_pwsr, pwstr, m_size*2);
m_pwsr[m_size] = L'\0';
}
}
~KWString()
{
if (m_pwsr)
{
delete[] m_pwsr;
m_pwsr = nullptr;
}
}
private:
wchar_t* m_pwsr;
size_t m_size;
};
//检索词库的词的最大长度为4
class COPENCCEXPORTDLLPORT CKOpenCCExport
{
public:
CKOpenCCExport(void);
~CKOpenCCExport(void);
public:
void CKOpenCC_InitCCMap(KWString& sName,int nFlag=3);//初始化KOpenCC夹下的txt文件,如sName=L"KOpenCC.txt",nFlag=初始化正映射 =2初始化反映射 =3正反映射皆初始化
void CKOpenCC_ClearCCMap();//清理
void CKOpenCC_Switch(KWString& sName, KWString& QWSIn, KWString& QSWOut, int nFlag=1);//nFlag=1简体到繁体 nFlag=2繁体到简体
};
#endif
2.定义转换类的实现
//C++ cpp文件
#include "pch.h"
#include "KOpenCCExport.h"
#include "TextFileIO.h"
#include <map>
using namespace std;
map<wstring,wstring>g_1And2Map,g_2And1Map;
map<wstring, map<wstring, wstring>> g_maps;
void GetPath(wstring& FolderPath, UINT pos = 0)
{
TCHAR path[MAX_PATH] = { 0 };
TCHAR* p = nullptr;
if (GetModuleFileName(NULL, path, _countof(path)))
{
for (UINT i = 0; i <= pos; i++)
{
p = _tcsrchr(path, _T('\\'));
if (p != nullptr)
p[0] = 0;
else
path[2] = 0;
}
}
FolderPath = wstring(path) + L"\\";
}
CKOpenCCExport::CKOpenCCExport(void)
{
wstring path;
GetPath(path);
path=path+_T("KOpenCC");
CreateDirectory(path.c_str(), NULL);
}
CKOpenCCExport::~CKOpenCCExport(void)
{
CKOpenCC_ClearCCMap();
}
void CKOpenCCExport::CKOpenCC_ClearCCMap()
{
map<wstring,map<wstring,wstring>>().swap(g_maps);
}
void CKOpenCCExport::CKOpenCC_InitCCMap(KWString& sName,int nFlag)
{
wstring path;
GetPath(path);
path=path+_T("KOpenCC\\");
wstring wsName = sName.c_str();
path = path + wsName;
CTextFileIO m;
m.ReadWholeTextFileToDQ(path.c_str());
deque<wstring>::iterator it=m._LineWStrDQ.begin();
UINT nLine=1;
bool Flag=false;//给定初始值
size_t Len=m._LineWStrDQ.size();
char separator ='\t';
wstring str,strLeft,strRight;
deque<wstring> WStrDQ;
size_t pos=-1;
wstring s0;
g_2And1Map.clear();
g_1And2Map.clear();
for(int j=0;it!=m._LineWStrDQ.end();it++,nLine++,j++)
{
if(j<2)continue;
WStrDQ.clear();
str=*it;
//处理结果压入
wstring s0, s1;
//分隔符处理
pos=str.find(separator);
if (pos == string::npos)
continue;
s0 = str.substr(0, pos);
s1 = str.substr(pos + 1);
size_t pos = s0.find(_T("\r\n"));
if (pos != string::npos)s0=s0.substr(0,s0.length()-2);
pos = s1.find(_T("\r\n"));
if (pos != string::npos)s1 = s1.substr(0, s1.length() - 2);
if(s0==s1)continue;
g_1And2Map.insert(pair<wstring,wstring>(s0,s1));
g_2And1Map.insert(pair<wstring,wstring>(s1,s0));
}
wstring wsName1 = wsName + L"1";
wstring wsName2 = wsName + L"2";
if (nFlag == 3)
{
g_maps.insert(pair<wstring, map<wstring, wstring>>(wsName1, g_1And2Map));
g_maps.insert(pair<wstring, map<wstring, wstring>>(wsName2, g_2And1Map));
}
else if (nFlag == 2)
{
g_maps.insert(pair<wstring, map<wstring, wstring>>(wsName2, g_2And1Map));
}
else if (nFlag == 1)
{
g_maps.insert(pair<wstring, map<wstring, wstring>>(wsName1, g_1And2Map));
}
else
;
map<wstring, wstring>().swap(g_1And2Map);
map<wstring, wstring>().swap(g_2And1Map);
}
void CKOpenCCExport::CKOpenCC_Switch(KWString& sName,KWString& QWSIn,KWString& QSWOut,int nFlag)
{
wstring wsName = sName.c_str();
if(nFlag==1)
wsName = wsName + L"1";
else if (nFlag == 2)
wsName = wsName + L"2";
else
return;
auto it0 = g_maps.find(wsName);
if (it0 == g_maps.end())return;
wstring str(QWSIn.c_str());
wstring s,sOut;
map<wstring,wstring>::iterator it=it0->second.end();
size_t Len=str.length();
for(size_t i=0;i<Len;i++)
{
s=str[i];
//4字词
if(i<=Len-4)
{
wstring s0=s+str[i+1]+str[i+2]+str[i+3];
it=it0->second.find(s0);
if(it!=it0->second.end())
{
sOut+=it->second;
i++;i++;i++;
continue;
}
}
//3字词
if(i<=Len-3)
{
wstring s0=s+str[i+1]+str[i+2];
it = it0->second.find(s0);
if (it != it0->second.end())
{
sOut+=it->second;
i++;i++;
continue;
}
}
//2字词
if(i<=Len-2)
{
wstring s0=s+str[i+1];
it = it0->second.find(s0);
if (it != it0->second.end())
{
sOut+=it->second;
i++;
continue;
}
}
//单字
it= it0->second.find(s);
if(it!= it0->second.end())
sOut+=it->second;
else
sOut+=s;
}
QSWOut = sOut.c_str();
}
3.定义加载文本映射文件类的头文件和实现代码
#include "pch.h"
#include "TextFileIO.h"
#include "io.h"
#include <iostream>
#include <fstream>
// Define tstring
#ifdef UNICODE
#define tstring std::wstring
#else
#define tstring std::string
#endif
// define Open
#if ((defined UNICODE)|| (defined _UNICODE))
#define Open OpenW
#else
#define Open OpenA
#endif
// define ReadLine
#if ((defined UNICODE)|| (defined _UNICODE))
#define ReadLine ReadLineW
#else
#define ReadLine ReadLineA
#endif
// Define WriteLine
#if ((defined UNICODE)|| (defined _UNICODE))
#define WriteLine WriteLineW
#else
#define WriteLine WriteLineA
#endif
// Define check file encoding type
#if ((defined UNICODE)|| (defined _UNICODE))
#define CheckFileEncodingType CheckFileEncodingTypeW
#else
#define CheckFileEncodingType CheckFileEncodingTypeA
#endif
CTextFileIO::CTextFileIO(void)
:_encodingType(ANSI)
, _IsValidate(false)
{
_file=0;
}
CTextFileIO::~CTextFileIO(void)
{
if(_file)
fclose(_file);
deque<wstring>().swap(_LineWStrDQ);
}
BOOL CTextFileIO::WriteWStringToUnicodeTxtFile(LPCTSTR lpPath,wstring& WStr)
{
ofstream out;//流对象
out.open(lpPath,ios::binary|ios::ate|ios::out);
char const* pos=(char const*)WStr.c_str();
//写入Unicode文件头
char const* const UnicodeHead="\xFF\xFE";
out.write(UnicodeHead,2);
out.write(pos,WStr.length()*2);
out.close();
return _IsValidate;
}
UINT g_nLine=0;
BOOL CTextFileIO::ReadWholeTextFileToDQ(LPCTSTR lpPath)
{
if(!(_IsValidate=Open(lpPath,READ))) return _IsValidate;
_encodingType=CheckFileEncodingType(lpPath);
tstring str;
int i=0;
bool bFlag=false;
g_nLine=0;
do
{
bFlag=ReadLine(str);
wstring WStr(str.c_str());
if(i==0&&_encodingType!=ANSI)
WStr=WStr.substr(1);
size_t pos = WStr.find(TEXT("\r\n"));
if(pos!= string::npos)WStr.replace(pos,2,TEXT(""));
_LineWStrDQ.push_back(WStr);
i++;
}while(bFlag);
if(_file)
fclose(_file);
return _IsValidate;
}
BOOL CTextFileIO::WStringFromTxtFile(LPCTSTR lpPath,wstring& WStr)
{
BOOL flag=ReadWholeTextFileToDQ(lpPath);
if(flag)
{
WStr=wstring();
deque<wstring>::iterator it=_LineWStrDQ.begin();
for(;it!=_LineWStrDQ.end();it++)
WStr+=*it+TEXT("\r\n");
deque<wstring>().swap(_LineWStrDQ);
}
return flag;
}
CTextFileIO::CTextFileIO(const WCHAR* filename, EDealMode mode)
{
// Testing for file type
_encodingType=CTextFileIO::CheckFileEncodingTypeW(filename);
_IsValidate=OpenW(filename,mode);
}
CTextFileIO::CTextFileIO(const char* filename, EDealMode mode)
{
// Testing for file type
_encodingType=CTextFileIO::CheckFileEncodingTypeA(filename);
_IsValidate=OpenA(filename,mode);
}
// Open file,UNICODE version
BOOL CTextFileIO::OpenW(const WCHAR* const filename, EDealMode mode)
{
_wfopen_s(&_file,filename,mode==READ?L"rb":L"wb");
if(_file==NULL)
return FALSE;
// Write Encoding tag
if(mode==WRITE)
WriteEncodingTag(_encodingType);
// Seek file pos
if(mode==READ)
OmitEncodingTag(_encodingType);
return TRUE;
}
// Open file,ANSI Version
BOOL CTextFileIO::OpenA(const char* const filename, EDealMode mode)
{
fopen_s(&_file,filename,mode==READ?"rb":"wb");
if(_file==NULL)
return FALSE;
// Write Encoding tag
if(mode==WRITE)
WriteEncodingTag(_encodingType);
// Seek file pos
if(mode==READ)
OmitEncodingTag(_encodingType);
return TRUE;
}
bool CTextFileIO::_ReadLine(string& s, int eol, int eof)
{
// reset string
s.clear();
// read one char at a time
while (true)
{
// read char
int c = fgetc(_file);
// check for EOF
if (c == eof || c == EOF) return false;
// append this character to the string
s += c;
// check for EOL
if (c == eol) return true;
};
}
// Read a line from file,return value is UNICODE string
bool CTextFileIO::_ReadLine(wstring& s, wint_t eol, wint_t eof)
{
// reset string
s.clear();
// read one wide char at a time
while (true)
{
// read wide char
wint_t c = fgetwc(_file);
// check for EOF
if (c == eof || c == WEOF) return false;
// append the wide character to the string
s += c;
// check for EOL
if (c == eol) return true;
};
return 0;
}
bool CTextFileIO::_WriteLine(const char *const s, int ret,int newline, size_t length)
{
// check if the pointer is valid
if (!s)
{
return false;
};
// calculate the string's length
if (length==-1)
{
length = strlen(s);
};
// write the string to the file
size_t n = fwrite(s, sizeof(char), length, _file);
// write line break to the file
fputc(ret, _file);
fputc(newline, _file);
// return whether the write operation was successful
return (n == length);
}
bool CTextFileIO::_WriteLine(const wchar_t *const s, wint_t ret,wint_t newline, size_t length)
{
// check if the pointer is valid
if (!s)
{
return false;
};
// calculate the string's length
if (length==-1)
{
length = wcslen(s);
};
// write the string to the file
size_t n = fwrite(s, sizeof(wchar_t), length, _file);
// write line break to the file
fputwc(ret, _file);
fputwc(newline,_file);
// return whether the write operation was successful
return (n == length);
}
// Unicode version of read line
bool CTextFileIO::ReadLineW(std::wstring &ws)
{
bool bResult=false;
switch(_encodingType)
{
case ANSI:
{
std::string s;
bResult=_ReadLine(s);
s+='\0';
int nLength=MultiByteToWideChar(CP_OEMCP,MB_PRECOMPOSED,s.c_str(),-1,NULL,0);
LPWSTR lpwStr=new WCHAR[nLength];
MultiByteToWideChar(CP_OEMCP,MB_PRECOMPOSED,s.c_str(), nLength,lpwStr,nLength);
ws=lpwStr;
//5行内出现utf-8 则认为是不加BOM的 UTF-8文件
g_nLine++;
if(g_nLine<5)
if(s.find("utf-8")!=-1||s.find("UTF-8")!=-1||
s.find("UTF_8")!=-1|| s.find("utf_8")!=-1)
_encodingType=UTF_8;
}
break;
case UTF_8:
{
std::string s;
bResult=_ReadLine(s);
s+='\0';
int nLength=MultiByteToWideChar(CP_UTF8,0,s.c_str(),-1,NULL,0);
LPWSTR lpwStr=new WCHAR[nLength];
MultiByteToWideChar(CP_UTF8,0,s.c_str(),-1,lpwStr,nLength);
ws=lpwStr;
}
break;
case UTF16_BE:
{
std::wstring s;
bResult=_ReadLine(s,0x0A00);
// Convert UTF16 big endian to UTF little endian
size_t nLength=s.length()*2+1;
BYTE *src=(BYTE*)s.c_str();
BYTE *dst=new BYTE[nLength];
memset(dst,0,nLength);
for(size_t i=0;i<nLength;)
{
dst[i]=src[i+1];
dst[i+1]=src[i];
i+=2;
}
ws=(WCHAR*)dst;
delete dst;
}
break;
case UTF16_LE:
{
std::wstring s;
bResult=_ReadLine(s);
ws=s;
}
break;
case UTF32_BE:
{
// TODO:will be implement later
}
break;
case UTF32_LE:
{
// TODO:will be implement later
}
break;
default:
{
}
}
return bResult;
}
// ANSI version of read line
bool CTextFileIO::ReadLineA(std::string &s)
{
bool bResult=false;
switch(_encodingType)
{
case ANSI:
{
bResult=_ReadLine(s);
}
break;
case UTF_8:
{
std::string s;
bResult=_ReadLine(s);
s+='\0';
// Convert utf-8 to ANSI,must first convert to UTF16-LE,then to ANSI
int nLength=MultiByteToWideChar(CP_UTF8,0,s.c_str(),-1,NULL,0);
LPWSTR lpwStr=new WCHAR[nLength];
MultiByteToWideChar(CP_UTF8,0,s.c_str(),-1,lpwStr,nLength);
int Length=WideCharToMultiByte(CP_ACP,0,lpwStr,nLength,NULL,0,NULL,NULL);
LPSTR lpStr=new CHAR[nLength];
WideCharToMultiByte(CP_UTF8,0,lpwStr,nLength,lpStr,Length,NULL,NULL);
s=lpStr;
}
break;
case UTF16_BE:
{
// Convert UTF16 big endian to UTF little endian
std::wstring ws;
bResult=_ReadLine(ws,0x0A00);
size_t nLength=ws.length()*2+1;
BYTE *src=(BYTE*)ws.c_str();
BYTE *dst=new BYTE[nLength];
memset(dst,0,nLength);
for(size_t i=0;i<nLength;)
{
dst[i]=src[i+1];
dst[i+1]=src[i];
i+=2;
}
ws.clear();
ws=(WCHAR*)dst;
delete dst;
// Convert UTF16 little endian to ANSI
int Length=WideCharToMultiByte(CP_ACP,0,ws.c_str(),-1,NULL,0,NULL,NULL);
LPSTR lpStr=new CHAR[Length];
WideCharToMultiByte(CP_ACP,0,ws.c_str(),-1,lpStr,Length,NULL,NULL);
s=lpStr;
}
break;
case UTF16_LE:
{
std::wstring ws;
bResult=_ReadLine(ws);
int nLength=WideCharToMultiByte(CP_ACP,0,ws.c_str(),-1,NULL,0,NULL,NULL);
LPSTR lpStr=new CHAR[nLength];
WideCharToMultiByte(CP_ACP,0,ws.c_str(),-1,lpStr,nLength,NULL,NULL);
s=lpStr;
}
break;
case UTF32_BE:
{
// TODO:will be implement later
}
break;
case UTF32_LE:
{
// TODO:will be implement later
}
break;
default:
{
}
}
return bResult;
}
// UNICODE version of write line
bool CTextFileIO::WriteLineW(const wchar_t *const wc)
{
bool bResult=false;
switch(_encodingType)
{
case ANSI:
{
int nLength=WideCharToMultiByte(CP_ACP,0,wc,-1,NULL,0,NULL,NULL);
LPSTR lpStr=new char[nLength];
WideCharToMultiByte(CP_ACP,0,wc,-1,lpStr,nLength,NULL,NULL);
bResult=_WriteLine(lpStr);
}
break;
case UTF_8:
{
// Convert utf-16 to UTF8
int nLength=WideCharToMultiByte(CP_UTF8,0,wc,-1,NULL,0,NULL,NULL);
LPSTR lpStr=new CHAR[nLength];
WideCharToMultiByte(CP_UTF8,0,wc,-1,lpStr,nLength,NULL,NULL);
// Write to file
bResult=_WriteLine(lpStr);
}
break;
case UTF16_BE:
{
BYTE *src=(BYTE*)wc;
size_t nLength=wcslen(wc)*2;
BYTE *dst=new BYTE[nLength]+2;
memset(dst,0,nLength+2);
for(size_t i=0;i<nLength;)
{
dst[i]=src[i+1];
dst[i+1]=src[i];
i+=2;
}
bResult=_WriteLine((wchar_t*)dst,0x0D00,0x0A00);
}
break;
case UTF16_LE:
{
bResult=_WriteLine(wc);
}
break;
case UTF32_BE:
{
// TODO:will be implement later
}
break;
case UTF32_LE:
{
// TODO:will be implement later
}
break;
default:
{
}
}
return bResult;
}
// ANSI Version of writeline
bool CTextFileIO::WriteLineA(const char *const c)
{
bool bResult=false;
switch(_encodingType)
{
case ANSI:
{
bResult=_WriteLine(c);
}
break;
case UTF_8:
{
// First convert to UTF16 litter endier
int nLength=MultiByteToWideChar(CP_ACP,0,c,-1,NULL,0);
LPWSTR lpWStr=new WCHAR[nLength];
MultiByteToWideChar(CP_ACP,0,c,-1,lpWStr,nLength);
// Convert utf-16 to UTF8
int Length=WideCharToMultiByte(CP_UTF8,0,lpWStr,nLength,NULL,0,NULL,NULL);
LPSTR lpStr=new CHAR[Length];
WideCharToMultiByte(CP_UTF8,0,lpWStr,nLength,lpStr,Length,NULL,NULL);
// Write to file
bResult=_WriteLine(lpStr);
}
break;
case UTF16_BE:
{
// First convert to UTF16 litter endian
int nLength=MultiByteToWideChar(CP_ACP,0,c,-1,NULL,0);
LPWSTR lpWStr=new WCHAR[nLength];
MultiByteToWideChar(CP_ACP,0,c,-1,lpWStr,nLength);
// Then convert to UTF16 big endian
BYTE *src=(BYTE*)lpWStr;
size_t Length=wcslen(lpWStr)*2;
BYTE *dst=new BYTE[Length+2];
memset(dst,0,Length+2);
for(size_t i=0;i<Length;)
{
dst[i]=src[i+1];
dst[i+1]=src[i];
i+=2;
}
// Write to file
bResult=_WriteLine((wchar_t*)dst,0x0D00,0x0A00);
}
break;
case UTF16_LE:
{
// First convert to UTF16 litter endian
int nLength=MultiByteToWideChar(CP_ACP,0,c,-1,NULL,0);
LPWSTR lpWStr=new WCHAR[nLength];
MultiByteToWideChar(CP_ACP,0,c,-1,lpWStr,nLength);
bResult=_WriteLine(lpWStr);
}
break;
case UTF32_BE:
{
// TODO:will be implement later
}
break;
case UTF32_LE:
{
// TODO:will be implement later
}
break;
default:
{
}
}
return bResult;
}
// Check file encoding type
CTextFileIO::EncodingType CTextFileIO::CheckFileEncodingTypeW(const WCHAR* const filename)
{
FILE* _file=0;
_wfopen_s(&_file,filename,L"rb");
// Read first 4 byte for testing
BYTE* buf=new BYTE[4];
size_t nRead=fread((void*)buf,sizeof(BYTE),4,_file);
// Close file
fclose(_file);
// Testing
if(nRead<2)
return ANSI;
// Testting tocken
BYTE utf32_le[]={0xFF,0xFE,0x00,0x00};
if(memcmp(buf,&utf32_le,4)==0)
return UTF32_LE;
BYTE utf32_be[]={0x00,0x00,0xFE,0xFF};
if(memcmp(buf,&utf32_be,4)==0)
return UTF32_BE;
BYTE utf_8[]={0xEF,0xBB,0xBF};
if(memcmp(buf,&utf_8,3)==0)
return UTF_8;
BYTE utf16_le[]={0xFF,0xFE};
if(memcmp(buf,&utf16_le,2)==0)
return UTF16_LE;
BYTE utf16_be[]={0xFE,0xFF};
if(memcmp(buf,&utf16_be,2)==0)
return UTF16_BE;
// Else
return ANSI;
}
CTextFileIO::EncodingType CTextFileIO::CheckFileEncodingTypeA(const CHAR* const filename)
{
FILE* _file=0;
fopen_s(&_file,filename,"rb");
// Read first 4 byte for testing
BYTE* buf=new BYTE[4];
size_t nRead=fread((void*)buf,sizeof(BYTE),4,_file);
// Close file
fclose(_file);
// Testing
if(nRead<2)
return ANSI;
//UTF32_LE 编码:
BYTE utf32_le[]={0xFF,0xFE,0x00,0x00};
if(memcmp(buf,&utf32_le,4)==0)
return UTF32_LE;
//UTF32_BE 编码:
BYTE utf32_be[]={0x00,0x00,0xFE,0xFF};
if(memcmp(buf,&utf32_be,4)==0)
return UTF32_BE;
//UTF-8 编码:
BYTE utf_8[]={0xEF,0xBB,0xBF};
if(memcmp(buf,&utf_8,3)==0)
return UTF_8;
//UNICODE编码 UTF16_LE:
BYTE utf16_le[]={0xFF,0xFE};
if(memcmp(buf,&utf16_le,2)==0)
return UTF16_LE;
//Unicode big endian编码 UTF16_BE :
BYTE utf16_be[]={0xFE,0xFF};
if(memcmp(buf,&utf16_be,2)==0)
return UTF16_BE;
// ANSI编码:无文件头(文件编码开头标志性字节)
return ANSI;
}
// Omit file encoding tag
int CTextFileIO::OmitEncodingTag(EncodingType type)
{
int nResult=0;
switch(_encodingType)
{
case UTF_8:
nResult=fseek(_file,3,SEEK_SET);
break;
case UTF16_BE:
case UTF16_LE:
nResult=fseek(_file,2,SEEK_SET);
break;
case UTF32_BE:
case UTF32_LE:
nResult=fseek(_file,4,SEEK_SET);
break;
}
return nResult;
}
// Write the encoding type tag an beginner of file
void CTextFileIO::WriteEncodingTag(EncodingType type)
{
switch(type)
{
case UTF_8:
{
BYTE utf_8[]={0xEF,0xBB,0xBF};
fwrite(utf_8,sizeof(BYTE),3,_file);
}
break;
case UTF16_BE:
{
BYTE utf16_be[]={0xFE,0xFF};
fwrite(utf16_be,sizeof(BYTE),2,_file);
}
break;
case UTF16_LE:
{
BYTE utf16_le[]={0xFF,0xFE};
fwrite(utf16_le,sizeof(BYTE),2,_file);
}
break;
case UTF32_BE:
{
BYTE utf32_be[]={0x00,0x00,0xFE,0xFF};
fwrite(utf32_be,sizeof(BYTE),4,_file);
}
case UTF32_LE:
{
BYTE utf32_le[]={0xFF,0xFE,0x00,0x00};
fwrite(utf32_le,sizeof(BYTE),4,_file);
}
break;
default:; // ANSI, do nothing
}
}
// 头文件
#ifndef H_89412764_0AAE_4D28_8500_75B6D40CEAE4
#define H_89412764_0AAE_4D28_8500_75B6D40CEAE4
#pragma once
#include <stdio.h>
#include <string>
#include <windows.h>
#include <tchar.h>
#include <deque>
using namespace std;
// CTextFileIO class declaration
class CTextFileIO
{
public:
CTextFileIO(void);
public:
~CTextFileIO(void);
public:
enum EDealMode {READ,WRITE};
// Text file encoding type enum
enum EncodingType {ANSI,UTF16_LE,UTF16_BE,UTF32_LE,UTF32_BE,UTF_8};
deque<wstring> _LineWStrDQ;
public:
CTextFileIO(const WCHAR* filename, EDealMode mode);
CTextFileIO(const char *filename,EDealMode mode);
public:
// Open the file
BOOL OpenW(const WCHAR* const filename, EDealMode mode);
BOOL OpenA(const char* const filename, EDealMode mode);
BOOL ReadWholeTextFileToDQ(LPCTSTR lpPath);
BOOL WStringFromTxtFile(LPCTSTR lpPath, wstring& WStr);
BOOL WriteWStringToUnicodeTxtFile(LPCTSTR lpPath, wstring& WStr);
private:
// Internale FILE object
FILE* _file;
private:
// Read a line from a MBCS file
// Return value:if end of file,return value is false,else is true
bool _ReadLine(string& s, int eol='\n', int eof=EOF);
// Read aline from a UNICODE file
// Return value:if end of file,return value is false,else is true
bool _ReadLine(wstring& s, wint_t eol=L'\n', wint_t eof=WEOF);
// ANSI version of read line
bool ReadLineA(std::string &s);
// Unicode version of read line
bool ReadLineW(std::wstring &ws);
private:
// To write an ANSI zero-terminated string (please consider using STL instead)
bool _WriteLine(const char* const s, int ret = '\r',int newline='\n', size_t length = -1);
// To write a Unicode zero-terminated string (please consider using STL instead)
bool _WriteLine(const wchar_t* const s, wint_t =L'\r', wint_t newline = L'\n', size_t length = -1);
// To write an ANSI STL string
bool _WriteLine(const std::string& s, int ret = '\r',int newline='\n')
{
return _WriteLine(s.c_str(), ret,newline, s.size());
};
// To write a Unicode STL string
bool _WriteLine(const std::wstring& s, wint_t ret=L'\r', wint_t newline = L'\n')
{
return _WriteLine(s.c_str(), ret, newline, s.size());
};
// ANSI version of write line
bool WriteLineA(const char* const s);
bool WriteLineA(const std::string &s)
{
return WriteLineA(s.c_str());
}
// Unicode version of write line
bool WriteLineW(const wchar_t* const ws);
bool WriteLineW(const std::wstring &ws)
{
return WriteLineW(ws.c_str());
};
private:
// File encoding type
EncodingType _encodingType;
public:
// Check file encoding type
static EncodingType CheckFileEncodingTypeW(const WCHAR* const filename);
static EncodingType CheckFileEncodingTypeA(const char* const filename);
public:
// Get current open file's encoding type
CTextFileIO::EncodingType GetEncodingType(void)
{
return _encodingType;
}
// Omit file encoding tag
int OmitEncodingTag(EncodingType type);
// Indicate file can be read or not
BOOL _IsValidate;
public:
// Check file is validate to read
BOOL IsValidate(void)
{
return _IsValidate;
}
// Write the encoding type tag an beginner of file
void WriteEncodingTag(EncodingType type);
};
#endif