BOOL CStdioFileEx::ReadString(CString& rString)
{
const int nMAX_LINE_CHARS = 4096;
BOOL bReadData = FALSE;
LPTSTR lpsz;
int nLen = 0;
wchar_t* pszUnicodeString = NULL;
wchar_t* pszMultiByteString= NULL;
int nChars = 0;
try
{
// If at position 0, discard byte-order mark before reading
if (!m_pStream || (GetPosition() == 0 && m_bIsUnicodeText))
{
wchar_t cDummy;
// Read(&cDummy, sizeof(_TCHAR));
Read(&cDummy, sizeof(wchar_t));
}
// If compiled for Unicode
#ifdef _UNICODE
if (m_bIsUnicodeText)
{
// Do standard stuff - Unicode to Unicode. Seems to work OK.
bReadData = CStdioFile::ReadString(rString);
}
else
{ //MultiByte(ASCII)--WideChar(UNICODE)
pszUnicodeString = new wchar_t[nMAX_LINE_CHARS];
pszMultiByteString = new wchar_t[nMAX_LINE_CHARS];
// Initialise to something safe
memset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
memset(pszMultiByteString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
// Read the string from the file pointer to a temporary array
//bReadData = (NULL != fgetws(pszMultiByteString, nMAX_LINE_CHARS, m_pStream));
char *pszTemp=new char[nMAX_LINE_CHARS];
ZeroMemory(pszTemp,nMAX_LINE_CHARS);
bReadData=(NULL != fgets(pszTemp, nMAX_LINE_CHARS, m_pStream));
/* if(fgets( pszTemp, nMAX_LINE_CHARS, m_pStream )==NULL)
{bReadData=0;}else{bReadData=1;}
*/
//open the file of UTF_8 and change to the UNICODE default coding
MultiByteToWideChar( CP_UTF8, 0, pszTemp, /*sizeof(pszTemp)*/ -1, pszMultiByteString, nMAX_LINE_CHARS );
delete [] pszTemp;
//bReadData = (NULL != fgetws(pszMultiByteString, nMAX_LINE_CHARS, m_pStream));
//if (bReadData)
//{
// Convert multibyte to Unicode, using the specified code page
//nChars = GetUnicodeStringFromMultiByteString((LPCSTR)pszMultiByteString, pszUnicodeString, nMAX_LINE_CHARS, m_nFileCodePage);
memcpy( pszUnicodeString, pszMultiByteString, sizeof(wchar_t) * nMAX_LINE_CHARS );
//if (nChars > 0)
//{
rString = (CString)pszUnicodeString;
//}
//}
}
#else
if (!m_bIsUnicodeText)
{
// Do standard stuff -- read ANSI in ANSI
bReadData = CStdioFile::ReadString(rString);
// Get the current code page
UINT nLocaleCodePage = GetCurrentLocaleCodePage();
// If we got it OK...
if (nLocaleCodePage > 0)
{
// if file code page does not match the system code page, we need to do a double conversion!
if (nLocaleCodePage != (UINT)m_nFileCodePage)
{
int nStringBufferChars = rString.GetLength() + 1;
pszUnicodeString = new wchar_t[nStringBufferChars];
// Initialise to something safe
memset(pszUnicodeString, 0, sizeof(wchar_t) * nStringBufferChars);
// Convert to Unicode using the file code page
nChars = GetUnicodeStringFromMultiByteString(rString, pszUnicodeString, nStringBufferChars, m_nFileCodePage);
// Convert back to multibyte using the system code page
// (This doesn't really confer huge advantages except to avoid "mangling" of non-convertible special
// characters. So, if a file in the E.European code page is displayed on a system using the
// western European code page, special accented characters which the system cannot display will be
// replaced by the default character (a hash or something), rather than being incorrectly mapped to
// other, western European accented characters).
if (nChars > 0)
{
// Calculate how much we need for the MB buffer (it might be larger)
nStringBufferChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString,nLocaleCodePage);
pszMultiByteString= new char[nStringBufferChars];
nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nStringBufferChars, nLocaleCodePage);
rString = (CString)pszMultiByteString;
}
}
}
}
else
{
pszUnicodeString = new wchar_t[nMAX_LINE_CHARS];
// Initialise to something safe
memset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
// Read as Unicode, convert to ANSI
// Bug fix by Dennis Jeryd 06/07/2003: initialise bReadData
bReadData = (NULL != fgetws(pszUnicodeString, nMAX_LINE_CHARS, m_pStream));
if (bReadData)
{
// Calculate how much we need for the multibyte string
int nRequiredMBBuffer = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString,m_nFileCodePage);
pszMultiByteString= new char[nRequiredMBBuffer];
nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nRequiredMBBuffer, m_nFileCodePage);
if (nChars > 0)
{
rString = (CString)pszMultiByteString;
}
}
}
#endif
// Then remove end-of-line character if in Unicode text mode
if (bReadData)
{
// Copied from FileTxt.cpp but adapted to Unicode and then adapted for end-of-line being just '\r'.
nLen = rString.GetLength();
if (nLen > 1 && rString.Mid(nLen-2) == sNEWLINE)
{
rString.GetBufferSetLength(nLen-2);
}
else
{
lpsz = rString.GetBuffer(0);
if (nLen != 0 && (lpsz[nLen-1] == _T('\r') || lpsz[nLen-1] == _T('\n')))
{
rString.GetBufferSetLength(nLen-1);
}
}
}
}
// Ensure we always delete in case of exception
catch(...)
{
if (pszUnicodeString) delete [] pszUnicodeString;
if (pszMultiByteString) delete [] pszMultiByteString;
throw;
}
if (pszUnicodeString) delete [] pszUnicodeString;
if (pszMultiByteString) delete [] pszMultiByteString;
return bReadData;
}
// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::WriteString()
//
// --------------------------------------------------------------------------------------------
// Returns: void
// Parameters: LPCTSTR lpsz
//
// Purpose: Writes string to file either in Unicode or multibyte, depending on whether the caller specified the
// CStdioFileEx::modeWriteUnicode flag. Override of base class function.
// Notes: If writing in Unicode we need to:
// a) Write the Byte-order-mark at the beginning of the file
// b) Write all strings in byte-mode
// - If we were compiled in Unicode, we need to convert Unicode to multibyte if
// we want to write in multibyte
// - If we were compiled in multi-byte, we need to convert multibyte to Unicode if
// we want to write in Unicode.
// Exceptions: None.
//
void CStdioFileEx::WriteString(LPCTSTR lpsz)
{
wchar_t* pszUnicodeString = NULL;
char * pszMultiByteString= NULL;
try
{
// If writing Unicode and at the start of the file, need to write byte mark
if (m_nFlags & CStdioFileEx::modeWriteUnicode)
{
// If at position 0, write byte-order mark before writing anything else
if (!m_pStream || GetPosition() == 0)
{
wchar_t cBOM = (wchar_t)nUNICODE_BOM;
CFile::Write(&cBOM, sizeof(wchar_t));
}
}
// If compiled in Unicode...
#ifdef _UNICODE
// If writing Unicode, no conversion needed
if (m_nFlags & CStdioFileEx::modeWriteUnicode)
{
// Write in byte mode
CFile::Write(lpsz, lstrlen(lpsz) * sizeof(wchar_t));
}
// Else if we don't want to write Unicode, need to convert
else
{
int nChars = lstrlen(lpsz); // Why plus 1? Because yes
// int nBufferSize = nChars * sizeof(char); // leave space for multi-byte chars
int nCharsWritten = 0;
int nBufferSize = 0;
pszUnicodeString = new wchar_t[nChars + 1];
ZeroMemory(pszUnicodeString,nChars + 1);
// Copy string to Unicode buffer
lstrcpy(pszUnicodeString, lpsz);
// Work out how much space we need for the multibyte conversion
//nBufferSize= GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);
// Get multibyte string
//nCharsWritten = GetMultiByteStringFromUnicodeString(pszUnicodeString,pszMultiByteString , nBufferSize, m_nFileCodePage);
int u8len=::WideCharToMultiByte(CP_UTF8,NULL,pszUnicodeString,wcslen(pszUnicodeString),NULL,0,NULL,NULL);
char*szU8=new char[u8len+1];
ZeroMemory(szU8,u8len+1);
::WideCharToMultiByte(CP_UTF8,NULL,pszUnicodeString,wcslen(pszUnicodeString),szU8,u8len,NULL,NULL);
szU8[u8len]='\0';
if(pszUnicodeString[0]==0xfeff)
{
//CFile::Write(/*(const void*)*/pszUnicodeString,nChars*sizeof(wchar_t));
CFile::Write(/*(const void*)*/szU8,u8len*sizeof(char));
}
else
{
//CFile::Write(/*(const void*)*/pszUnicodeString,nChars*sizeof(wchar_t));
CFile::Write(/*(const void*)*/szU8,u8len*sizeof(char));
}
}
// Else if *not* compiled in Unicode
#else
// If writing Unicode, need to convert
if (m_nFlags & CStdioFileEx::modeWriteUnicode)
{
int nChars = lstrlen(lpsz) + 1; // Why plus 1? Because yes
int nBufferSize = nChars * sizeof(wchar_t);
int nCharsWritten = 0;
pszUnicodeString = new wchar_t[nChars];
pszMultiByteString= new char[nChars];
// Copy string to multibyte buffer
lstrcpy(pszMultiByteString, lpsz);
nCharsWritten = GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString, nChars, m_nFileCodePage);
if (nCharsWritten > 0)
{
// Do byte-mode write using actual chars written (fix by Howard J Oh)
// CFile::Write(pszUnicodeString, lstrlen(lpsz) * sizeof(wchar_t));
CFile::Write(pszUnicodeString, nCharsWritten*sizeof(wchar_t));
}
else
{
ASSERT(false);
}
}
// Else if we don't want to write Unicode, no conversion needed, unless the code page differs
else
{
// // Do standard stuff
// CStdioFile::WriteString(lpsz);
// Get the current code page
UINT nLocaleCodePage = GetCurrentLocaleCodePage();
// If we got it OK, and if file code page does not match the system code page, we need to do a double conversion!
if (nLocaleCodePage > 0 && nLocaleCodePage != (UINT)m_nFileCodePage)
{
int nChars = lstrlen(lpsz) + 1; // Why plus 1? Because yes
pszUnicodeString = new wchar_t[nChars];
// Initialise to something safe
memset(pszUnicodeString, 0, sizeof(wchar_t) * nChars);
// Convert to Unicode using the locale code page (the code page we are using in memory)
nChars = GetUnicodeStringFromMultiByteString((LPCSTR)(const char*)lpsz, pszUnicodeString, nChars, nLocaleCodePage);
// Convert back to multibyte using the file code page
// (Note that you can't reliably read a non-Unicode file written in code page A on a system using a code page B,
// modify the file and write it back using code page A, unless you disable all this double-conversion code.
// In effect, you have to choose between a mangled character display and mangled file writing).
if (nChars > 0)
{
// Calculate how much we need for the MB buffer (it might be larger)
nChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);
pszMultiByteString= new char[nChars];
memset(pszMultiByteString, 0, sizeof(char) * nChars);
nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nChars, m_nFileCodePage);
// Do byte-mode write. This avoids annoying "interpretation" of \n's as \r\n
CFile::Write((const void*)pszMultiByteString, nChars * sizeof(char));
}
}
else
{
// Do byte-mode write. This avoids annoying "interpretation" of \n's as \r\n
CFile::Write((const void*)lpsz, lstrlen(lpsz)*sizeof(char));
}
}
#endif
}
// Ensure we always clean up
catch(...)
{
if (pszUnicodeString) delete [] pszUnicodeString;
if (pszMultiByteString) delete [] pszMultiByteString;
throw;
}
if (pszUnicodeString) delete [] pszUnicodeString;
if (pszMultiByteString) delete [] pszMultiByteString;
}
读取和写入不同编码格式的文件
最新推荐文章于 2024-01-01 18:20:34 发布