读取和写入不同编码格式的文件

最新推荐文章于 2024-01-01 18:20:34 发布
焦三仙本仙
最新推荐文章于 2024-01-01 18:20:34 发布
阅读量1k
点赞数 1
文章标签： delete null file character system string
本文链接：https://blog.csdn.net/ghevinn/article/details/7877873
版权
BOOL CStdioFileEx::ReadString(CString& rString)
{
	const int	nMAX_LINE_CHARS = 4096;
	BOOL		bReadData = FALSE;
	LPTSTR		lpsz;
	int			nLen = 0;
	wchar_t*	pszUnicodeString = NULL;
	wchar_t*	pszMultiByteString= NULL;
	int			nChars = 0;

	try
	{
		// If at position 0, discard byte-order mark before reading
		if (!m_pStream || (GetPosition() == 0 && m_bIsUnicodeText))
		{
			wchar_t	cDummy;
	//		Read(&cDummy, sizeof(_TCHAR));
			Read(&cDummy, sizeof(wchar_t));
		}

// If compiled for Unicode
#ifdef _UNICODE
		if (m_bIsUnicodeText)
		{
			// Do standard stuff - Unicode to Unicode. Seems to work OK.
			bReadData = CStdioFile::ReadString(rString);
		}
		else
		{   //MultiByte(ASCII)--WideChar(UNICODE)
			pszUnicodeString	= new wchar_t[nMAX_LINE_CHARS]; 
			pszMultiByteString  = new wchar_t[nMAX_LINE_CHARS]; 

			// Initialise to something safe
			memset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
			memset(pszMultiByteString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
			
			// Read the string from the file pointer to a temporary array
			//bReadData = (NULL != fgetws(pszMultiByteString, nMAX_LINE_CHARS, m_pStream));
			
			char *pszTemp=new char[nMAX_LINE_CHARS];
			ZeroMemory(pszTemp,nMAX_LINE_CHARS);
		    bReadData=(NULL != fgets(pszTemp, nMAX_LINE_CHARS, m_pStream));
			/*	if(fgets( pszTemp, nMAX_LINE_CHARS, m_pStream )==NULL)
			{bReadData=0;}else{bReadData=1;}
			*/
			//open the file of UTF_8 and change to the UNICODE default coding
			
            MultiByteToWideChar( CP_UTF8, 0, pszTemp, /*sizeof(pszTemp)*/ -1, pszMultiByteString, nMAX_LINE_CHARS );
			
			delete [] pszTemp;

            //bReadData = (NULL != fgetws(pszMultiByteString, nMAX_LINE_CHARS, m_pStream));
			//if (bReadData)
			//{
				// Convert multibyte to Unicode, using the specified code page
				//nChars = GetUnicodeStringFromMultiByteString((LPCSTR)pszMultiByteString, pszUnicodeString, nMAX_LINE_CHARS, m_nFileCodePage);
				memcpy( pszUnicodeString, pszMultiByteString, sizeof(wchar_t) * nMAX_LINE_CHARS );
				//if (nChars > 0)
				//{
					rString = (CString)pszUnicodeString;
				//}
			//}
		}
#else

		if (!m_bIsUnicodeText)
		{
			// Do standard stuff -- read ANSI in ANSI
			bReadData = CStdioFile::ReadString(rString);

			// Get the current code page
			UINT nLocaleCodePage = GetCurrentLocaleCodePage();

			// If we got it OK...
			if (nLocaleCodePage > 0)
			{
				// if file code page does not match the system code page, we need to do a double conversion!
				if (nLocaleCodePage != (UINT)m_nFileCodePage)
				{
					int nStringBufferChars = rString.GetLength() + 1;

					pszUnicodeString	= new wchar_t[nStringBufferChars]; 

					// Initialise to something safe
					memset(pszUnicodeString, 0, sizeof(wchar_t) * nStringBufferChars);
					
					// Convert to Unicode using the file code page
					nChars = GetUnicodeStringFromMultiByteString(rString, pszUnicodeString, nStringBufferChars, m_nFileCodePage);

					// Convert back to multibyte using the system code page
					// (This doesn't really confer huge advantages except to avoid "mangling" of non-convertible special
					// characters. So, if a file in the E.European code page is displayed on a system using the 
					// western European code page, special accented characters which the system cannot display will be
					// replaced by the default character (a hash or something), rather than being incorrectly mapped to
					// other, western European accented characters).
					if (nChars > 0)
					{
						// Calculate how much we need for the MB buffer (it might be larger)
						nStringBufferChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString,nLocaleCodePage);
						pszMultiByteString= new char[nStringBufferChars];  

						nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nStringBufferChars, nLocaleCodePage);
						rString = (CString)pszMultiByteString;
					}
				}
			}
		}
		else
		{
			pszUnicodeString	= new wchar_t[nMAX_LINE_CHARS]; 

			// Initialise to something safe
			memset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);
			
			// Read as Unicode, convert to ANSI

			// Bug fix by Dennis Jeryd 06/07/2003: initialise bReadData
			bReadData = (NULL != fgetws(pszUnicodeString, nMAX_LINE_CHARS, m_pStream));

			if (bReadData)
			{
				// Calculate how much we need for the multibyte string
				int nRequiredMBBuffer = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString,m_nFileCodePage);
				pszMultiByteString= new char[nRequiredMBBuffer];  

				nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nRequiredMBBuffer, m_nFileCodePage);

				if (nChars > 0)
				{
					rString = (CString)pszMultiByteString;
				}
			}

		}
#endif

		// Then remove end-of-line character if in Unicode text mode
		if (bReadData)
		{
			// Copied from FileTxt.cpp but adapted to Unicode and then adapted for end-of-line being just '\r'. 
			nLen = rString.GetLength();
			if (nLen > 1 && rString.Mid(nLen-2) == sNEWLINE)
			{
				rString.GetBufferSetLength(nLen-2);
			}
			else
			{
				lpsz = rString.GetBuffer(0);
				if (nLen != 0 && (lpsz[nLen-1] == _T('\r') || lpsz[nLen-1] == _T('\n')))
				{
					rString.GetBufferSetLength(nLen-1);
				}
			}
		}
	}
	// Ensure we always delete in case of exception
	catch(...)
	{
		if (pszUnicodeString)	delete [] pszUnicodeString;

		if (pszMultiByteString) delete [] pszMultiByteString;

		throw;
	}

	if (pszUnicodeString)		delete [] pszUnicodeString;

	if (pszMultiByteString)		delete [] pszMultiByteString;

	return bReadData;
}



// --------------------------------------------------------------------------------------------
//
//	CStdioFileEx::WriteString()
//
// --------------------------------------------------------------------------------------------
// Returns:    void
// Parameters: LPCTSTR lpsz
//
// Purpose:		Writes string to file either in Unicode or multibyte, depending on whether the caller specified the
//					CStdioFileEx::modeWriteUnicode flag. Override of base class function.
// Notes:		If writing in Unicode we need to:
//						a) Write the Byte-order-mark at the beginning of the file
//						b) Write all strings in byte-mode
//					-	If we were compiled in Unicode, we need to convert Unicode to multibyte if 
//						we want to write in multibyte
//					-	If we were compiled in multi-byte, we need to convert multibyte to Unicode if 
//						we want to write in Unicode.
// Exceptions:	None.
//
void CStdioFileEx::WriteString(LPCTSTR lpsz)
{
	wchar_t*	pszUnicodeString	= NULL; 
	char	*	pszMultiByteString= NULL;
	
	try
	{
		// If writing Unicode and at the start of the file, need to write byte mark
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			// If at position 0, write byte-order mark before writing anything else
			if (!m_pStream || GetPosition() == 0)
			{
				wchar_t cBOM = (wchar_t)nUNICODE_BOM;
				CFile::Write(&cBOM, sizeof(wchar_t));
			}
		}

// If compiled in Unicode...
#ifdef _UNICODE

		// If writing Unicode, no conversion needed
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			// Write in byte mode
			CFile::Write(lpsz, lstrlen(lpsz) * sizeof(wchar_t));
		}
		// Else if we don't want to write Unicode, need to convert
		else
		{
			int		nChars = lstrlen(lpsz);				// Why plus 1? Because yes
//			int		nBufferSize = nChars * sizeof(char);	// leave space for multi-byte chars
			int		nCharsWritten = 0;
			int		nBufferSize = 0;

			pszUnicodeString	= new wchar_t[nChars + 1]; 
			ZeroMemory(pszUnicodeString,nChars + 1);
			// Copy string to Unicode buffer
			lstrcpy(pszUnicodeString, lpsz);

			// Work out how much space we need for the multibyte conversion
			 //nBufferSize= GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);
			
			// Get multibyte string
			//nCharsWritten = GetMultiByteStringFromUnicodeString(pszUnicodeString,pszMultiByteString , nBufferSize, m_nFileCodePage);

	
			int u8len=::WideCharToMultiByte(CP_UTF8,NULL,pszUnicodeString,wcslen(pszUnicodeString),NULL,0,NULL,NULL);
			char*szU8=new char[u8len+1];
			ZeroMemory(szU8,u8len+1);
			::WideCharToMultiByte(CP_UTF8,NULL,pszUnicodeString,wcslen(pszUnicodeString),szU8,u8len,NULL,NULL);
			szU8[u8len]='\0';

			if(pszUnicodeString[0]==0xfeff)
			{
				//CFile::Write(/*(const void*)*/pszUnicodeString,nChars*sizeof(wchar_t));
                  CFile::Write(/*(const void*)*/szU8,u8len*sizeof(char));
			}
			else
			{
                //CFile::Write(/*(const void*)*/pszUnicodeString,nChars*sizeof(wchar_t));
				  CFile::Write(/*(const void*)*/szU8,u8len*sizeof(char));
			}

		}
	// Else if *not* compiled in Unicode
#else
		// If writing Unicode, need to convert
		if (m_nFlags & CStdioFileEx::modeWriteUnicode)
		{
			int		nChars = lstrlen(lpsz) + 1;	 // Why plus 1? Because yes
			int		nBufferSize = nChars * sizeof(wchar_t);
			int		nCharsWritten = 0;

			pszUnicodeString	= new wchar_t[nChars];
			pszMultiByteString= new char[nChars]; 

			// Copy string to multibyte buffer
			lstrcpy(pszMultiByteString, lpsz);

			nCharsWritten = GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString, nChars, m_nFileCodePage);

			if (nCharsWritten > 0)
			{
				// Do byte-mode write using actual chars written (fix by Howard J Oh)
	//			CFile::Write(pszUnicodeString, lstrlen(lpsz) * sizeof(wchar_t));
				CFile::Write(pszUnicodeString, nCharsWritten*sizeof(wchar_t));
			}
			else
			{
				ASSERT(false);
			}

		}
		// Else if we don't want to write Unicode, no conversion needed, unless the code page differs
		else
		{
	//		// Do standard stuff
	//		CStdioFile::WriteString(lpsz);

			// Get the current code page
			UINT nLocaleCodePage = GetCurrentLocaleCodePage();

			// If we got it OK, and if file code page does not match the system code page, we need to do a double conversion!
			if (nLocaleCodePage > 0 && nLocaleCodePage != (UINT)m_nFileCodePage)
			{
				int	nChars = lstrlen(lpsz) + 1;	 // Why plus 1? Because yes

				pszUnicodeString	= new wchar_t[nChars]; 

				// Initialise to something safe
				memset(pszUnicodeString, 0, sizeof(wchar_t) * nChars);
				
				// Convert to Unicode using the locale code page (the code page we are using in memory)
				nChars = GetUnicodeStringFromMultiByteString((LPCSTR)(const char*)lpsz, pszUnicodeString, nChars, nLocaleCodePage);

				// Convert back to multibyte using the file code page
				// (Note that you can't reliably read a non-Unicode file written in code page A on a system using a code page B,
				// modify the file and write it back using code page A, unless you disable all this double-conversion code.
				// In effect, you have to choose between a mangled character display and mangled file writing).
				if (nChars > 0)
				{
					// Calculate how much we need for the MB buffer (it might be larger)
					nChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);

					pszMultiByteString= new char[nChars];  
					memset(pszMultiByteString, 0, sizeof(char) * nChars);

					nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nChars, m_nFileCodePage);

					// Do byte-mode write. This avoids annoying "interpretation" of \n's as \r\n
					CFile::Write((const void*)pszMultiByteString, nChars * sizeof(char));
				}
			}
			else
			{
				// Do byte-mode write. This avoids annoying "interpretation" of \n's as \r\n
				CFile::Write((const void*)lpsz, lstrlen(lpsz)*sizeof(char));
			}
		}

#endif
	}
	// Ensure we always clean up
	catch(...)
	{
		if (pszUnicodeString)	delete [] pszUnicodeString;
		if (pszMultiByteString)	delete [] pszMultiByteString;
		throw;
	}

	if (pszUnicodeString)	delete [] pszUnicodeString;
	if (pszMultiByteString)	delete [] pszMultiByteString;
}