十二:MFC读取XML

本文档展示了如何在MFC中使用CMarkup类读取和操作XML文件。内容包括类的成员函数,如设置和加载XML文档,查找元素,保存到文件等。还提供了示例代码,演示了如何读取XML文件并搜索特定元素。
摘要由CSDN通过智能技术生成

Markup.cpp

#include "stdafx.h"
#include <stdio.h>
#include "Markup.h"

#ifdef MCD_STRERROR
#include <string.h>
#include <errno.h>
#else
#include <windows.h>
#endif

#if defined(_DEBUG) && ! defined(MARKUP_STL) && ! defined(MARKUP_STDC)
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

#ifdef _MBCS
#pragma message( "Note: MBCS build (not UTF-8)" )
// For UTF-8, remove _MBCS from project settings C/C++ preprocessor definitions
#endif

// Customization
#define x_EOL _T("/r/n") // can be /r/n or /n or empty
#define x_EOLLEN (sizeof(x_EOL)/sizeof(MCD_CHAR)-1) // string length of x_EOL
#define x_ATTRIBQUOTE _T("/"") // can be double or single quote


void CMarkup::operator=( const CMarkup& markup )
{
 m_iPosParent = markup.m_iPosParent;
 m_iPos = markup.m_iPos;
 m_iPosChild = markup.m_iPosChild;
 m_iPosFree = markup.m_iPosFree;
 m_iPosDeleted = markup.m_iPosDeleted;
 m_nNodeType = markup.m_nNodeType;
 m_nNodeOffset = markup.m_nNodeOffset;
 m_nNodeLength = markup.m_nNodeLength;
 m_strDoc = markup.m_strDoc;
 m_strError = markup.m_strError;
 m_nFlags = markup.m_nFlags;

 // Copy used part of the index array
 m_aPos.RemoveAll();
 m_aPos.nSize = m_iPosFree;
 if ( m_aPos.nSize < 8 )
  m_aPos.nSize = 8;
 m_aPos.nSegs = m_aPos.SegsUsed();
 if ( m_aPos.nSegs )
 {
  m_aPos.pSegs = (ElemPos**)(new char[m_aPos.nSegs*sizeof(char*)]);
  int nSegSize = 1 << m_aPos.PA_SEGBITS;
  for ( int nSeg=0; nSeg < m_aPos.nSegs; ++nSeg )
  {
   if ( nSeg + 1 == m_aPos.nSegs )
    nSegSize = m_aPos.GetSize() - (nSeg << m_aPos.PA_SEGBITS);
   m_aPos.pSegs[nSeg] = (ElemPos*)(new char[nSegSize*sizeof(ElemPos)]);
   memcpy( m_aPos.pSegs[nSeg], markup.m_aPos.pSegs[nSeg], nSegSize*sizeof(ElemPos) );
  }
 }

 // Copy SavedPos map
 m_mapSavedPos.RemoveAll();
 if ( markup.m_mapSavedPos.pTable )
 {
  m_mapSavedPos.AllocMapTable();
  for ( int nSlot=0; nSlot < SavedPosMap::SPM_SIZE; ++nSlot )
  {
   SavedPos* pCopySavedPos = markup.m_mapSavedPos.pTable[nSlot];
   if ( pCopySavedPos )
   {
    int nCount = 0;
    while ( pCopySavedPos[nCount].nSavedPosFlags & SavedPosMap::SPM_USED )
    {
     ++nCount;
     if ( pCopySavedPos[nCount-1].nSavedPosFlags & SavedPosMap::SPM_LAST )
      break;
    }
    if ( nCount )
    {
     SavedPos* pNewSavedPos = new SavedPos[nCount];
     for ( int nCopy=0; nCopy<nCount; ++nCopy )
      pNewSavedPos[nCopy] = pCopySavedPos[nCopy];
     pNewSavedPos[nCount-1].nSavedPosFlags |= SavedPosMap::SPM_LAST;
     m_mapSavedPos.pTable[nSlot] = pNewSavedPos;
    }
   }
  }
 }

 MARKUP_SETDEBUGSTATE;
}

bool CMarkup::SetDoc( MCD_PCSZ szDoc )
{
 // Set document text
 if ( szDoc )
  m_strDoc = szDoc;
 else
  MCD_STRCLEAR(m_strDoc);

 MCD_STRCLEAR(m_strError);
 return x_ParseDoc();
};

bool CMarkup::SetDoc( const MCD_STR& strDoc )
{
 m_strDoc = strDoc;
 MCD_STRCLEAR(m_strError);
 return x_ParseDoc();
}

bool CMarkup::IsWellFormed()
{
 if ( m_aPos.GetSize()
   && ! (m_aPos[0].nFlags & MNF_ILLFORMED)
   && m_aPos[0].iElemChild
   && ! m_aPos[m_aPos[0].iElemChild].iElemNext )
  return true;
 return false;
}

bool CMarkup::Load( MCD_CSTR szFileName )
{
 //MessageBox(NULL,_T("fafa"),NULL,MB_OK);
 if ( ! ReadTextFile(szFileName, m_strDoc, &m_strError, &m_nFlags) )
  return false;
 return x_ParseDoc();
}

bool CMarkup::ReadTextFile( MCD_CSTR szFileName, MCD_STR& strDoc, MCD_STR* pstrError, int* pnFlags )
{
 // Static utility method to load text file into strDoc
 //
 // Open file to read binary
 FILE* fp = MCD_FOPEN( szFileName, _T("rb") );
 if ( ! fp )
 {
  if ( pstrError )
   *pstrError = x_GetLastError();
  return false;
 }

 // Set flags to 0 unless flags argument provided
 int nFlags = pnFlags?*pnFlags:0;
 MCD_CHAR szDescBOM[20] = {0};
 MCD_CHAR szResult[100];
 MCD_STRCLEAR(strDoc);

 // Get file length
 fseek( fp, 0, SEEK_END );
 int nFileByteLen = ftell( fp );
 fseek( fp, 0, SEEK_SET );


#if defined(UNICODE) // convert file to wide char
 int nWideLen = 0;
 if ( nFileByteLen )
 {
  char* pBuffer = new char[nFileByteLen];
  fread( pBuffer, nFileByteLen, 1, fp );
  /*
  // Alternative: use these 3 lines instead of 3 lines below using UTF8To16
  // For ANSI files, replace CP_UTF8 with CP_ACP in both places
  nWideLen = MultiByteToWideChar(CP_UTF8,0,pBuffer,nFileByteLen,NULL,0);
  MCD_CHAR* pUTF16Buffer = MCD_GETBUFFER(strDoc,nWideLen);
  MultiByteToWideChar(CP_UTF8,0,pBuffer,nFileByteLen,pUTF16Buffer,nWideLen);
  */
  // For ANSI files, replace both UTF8To16 calls with mbstowcs (arguments are the same)
  nWideLen = UTF8To16(NULL,pBuffer,nFileByteLen);
  MCD_CHAR* pUTF16Buffer = MCD_GETBUFFER(strDoc,nWideLen);
  UTF8To16(pUTF16Buffer,pBuffer,nFileByteLen);
  MCD_RELEASEBUFFER( strDoc, pUTF16Buffer, nWideLen );
  delete [] pBuffer;
 }
 MCD_SPRINTF( szResult, _T("%s%d bytes to %d wide chars"), szDescBOM, nFileByteLen, nWideLen );
 if ( pstrError )
  *pstrError = szResult;
#else // read file directly
 if ( nFileByteLen )
 {
  MCD_CHAR* pUTF8Buffer = MCD_GETBUFFER(strDoc,nFileByteLen);
  fread( pUTF8Buffer, nFileByteLen, 1, fp );
  MCD_RELEASEBUFFER( strDoc, pUTF8Buffer, nFileByteLen );
#if defined(_MBCS) // needs to be in memory as MBCS
  MCD_STR strEncoding = GetDeclaredEncoding( strDoc );
  if ( MCD_STRISEMPTY(strEncoding) || MCD_PSZNICMP(MCD_2PCSZ(strEncoding),_T("UTF-8"),5)==0 )
   strDoc = UTF8ToA( strDoc );
#endif
 }
 MCD_SPRINTF( szResult, _T("%s%d bytes"), szDescBOM, nFileByteLen );
 if ( pstrError )
  *pstrError = szResult;
#endif
 fclose( fp );
 if ( pnFlags )
  *pnFlags = nFlags;
 return true;
}

bool CMarkup::Save( MCD_CSTR szFileName )
{
 return WriteTextFile( szFileName, m_strDoc, &m_strError, &m_nFlags );
}

bool CMarkup::WriteTextFile( MCD_CSTR szFileName, MCD_STR& strDoc, MCD_STR* pstrError, int* pnFlags )
{
 // Static utility method to save strDoc to text file
 //
 // Open file to write binary
 bool bSuccess = true;
 FILE* fp = MCD_FOPEN( szFileName, _T("wb") );
 if ( ! fp )
 {
  if ( pstrError )
   *pstrError = x_GetLastError();
  return false;
 }

 // Set flags to 0 unless flags argument provided
 int nFlags = pnFlags?*pnFlags:0;
 MCD_CHAR szDescBOM[20] = {0};
 MCD_CHAR szResult[100];

 // Get document length
 int nDocLength = MCD_STRLENGTH(strDoc);


#if defined( UNICODE )
 int nMBLen = 0;
 if ( nDocLength )
 {
  /*
  // Alternative: use these 3 lines instead of 3 lines below using UTF16To8
  // For ANSI files, replace CP_UTF8 with CP_ACP in both places
  nMBLen = WideCharToMultiByte(CP_UTF8,0,strDoc,nDocLength,NULL,0,NULL,NULL);
  char* pBuffer = new char[nMBLen+1];
  WideCharToMultiByte(CP_UTF8,0,strDoc,nDocLength,pBuffer,nMBLen+1,NULL,NULL);
  */
  // For ANSI files, replace both UTF16To8 calls with wcstombs (arguments are the same)
  nMBLen = UTF16To8(NULL,MCD_2PCSZ(strDoc),0);
  char* pBuffer = new char[nMBLen+1];
  UTF16To8(pBuffer,MCD_2PCSZ(strDoc),nMBLen);
  bSuccess = ( fwrite( pBuffer, nMBLen, 1, fp ) == 1 );
  delete [] pBuffer;
 }
 MCD_SPRINTF( szResult, _T("%d wide chars to %s%d bytes"), nDocLength, szDescBOM, nMBLen );
 if ( pstrError )
  *pstrError = szResult;
#else // MBCS or UTF-8
 if ( nDocLength )
 {
  MCD_STR strDocWrite = strDoc; // reference unless converted
#if defined(_MBCS) // is in memory as MBCS
  MCD_STR strEncoding = GetDeclaredEncoding( strDoc );
  if ( MCD_STRISEMPTY(strEncoding) || MCD_PSZNICMP(MCD_2PCSZ(strEncoding),_T("UTF-8"),5)==0 )
   strDocWrite = AToUTF8( strDoc );
#endif
  nDocLength = MCD_STRLENGTH(strDocWrite);
  bSuccess = ( fwrite( MCD_2PCSZ(strDocWrite), nDocLength, 1, fp ) == 1 );
 }
 MCD_SPRINTF( szResult, _T("%s%d bytes"), szDescBOM, nDocLength );
 if ( pstrError )
  *pstrError = szResult;
#endif
 
 if ( ! bSuccess && pstrError )
  *pstrError = x_GetLastError();
 fclose(fp);
 if ( pnFlags )
  *pnFlags = nFlags;
 return bSuccess;
}

bool CMarkup::FindElem( MCD_CSTR szName )
{
 // Change current position only if found
 //
 //MessageBox(NULL,szName,NULL,MB_OK);
 if ( m_aPos.GetSize() )
 {
  int iPos = x_FindElem( m_iPosParent, m_iPos, szName );
  if ( iPos )
  {
   // Assign new position
   x_SetPos( m_aPos[iPos].iElemParent, iPos, 0 );
   return true;
  }
 }
 return false;
}

bool CMarkup::FindChildElem( MCD_CSTR szName )
{
 // Change current child position only if found
 //
 // Shorthand: call this with no current main position
 // means find child under root element
 if ( ! m_iPos )
  FindElem();

 int iPosChild = x_FindElem( m_iPos, m_iPosChild, szName );
 if ( iPosChild )
 {
  // Assign new position
  int iPos = m_aPos[iPosChild].iElemParent;
  x_SetPos( m_aPos[iPos].iElemParent, iPos, iPosChild );
  return true;
 }

 return false;
}

MCD_STR CMarkup::EscapeText( MCD_CSTR szText, int nFlags )
{
 // Convert text as seen outside XML document to XML friendly
 // replacing special characters with ampersand escape codes
 // E.g. convert "6>7" to "6&gt;7"
 //
 // &lt;   less than
 // &amp;  ampersand
 // &gt;   greater than
 //
 // and for attributes:
 //
 // &apos; apostrophe or single quote
 // &quot; double quote
 //
 static MCD_PCSZ szaReplace[] = { _T("&lt;"),_T("&amp;"),_T("&gt;"),_T("&apos;"),_T("&quot;") };
 MCD_PCSZ pFind = (nFlags&MNF_ESCAPEQUOTES)?_T("<&>/'/""):_T("<&>");
 MCD_STR strText;
 MCD_PCSZ pSource = szText;
 int nDestSize = MCD_PSZLEN(pSource);
 nDestSize += nDestSize / 10 + 7;
 MCD_BLDRESERVE(strText,nDestSize);
 MCD_CHAR cSource = *pSource;
 MCD_PCSZ pFound;
 int nCharLen;
 while ( cSource )
 {
  MCD_BLDCHECK(strText,nDestSize,6);
  if ( (pFound=MCD_PSZCHR(pFind,cSource)) != NULL )
  {
   bool bIgnoreAmpersand = false;
   if ( (nFlags&MNF_WITHREFS) && *pFound == _T('&') )
   {
    // Do not replace ampersand if it is start of any entity reference
    // &[#_:A-Za-zU][_:-.A-Za-z0-9U]*; where U is > 0x7f
    MCD_PCSZ pCheckEntity = pSource;
    ++pCheckEntity;
    MCD_CHAR c = *pCheckEntity;
    if ( (c>=_T('A')&&c<=_T('Z')) || (c>=_T('a')&&c<=_T('z'))
      || c==_T('#') || c==_T('_') || c==_T(':') || ((unsigned int)c)>0x7f )
    {
     while ( 1 )
     {
      pCheckEntity += MCD_CLEN( pCheckEntity );
      c = *pCheckEntity;
      if ( c == _T(';') )
      {
       int nEntityLen = (int)(pCheckEntity - pSource) + 1;
       MCD_BLDAPPENDN(strText,pSource,nEntityLen);
       pSource = pCheckEntity;
       bIgnoreAmpersand = true;
      }
      else if ( (c>=_T('A')&&c<=_T('Z')) || (c>=_T('a')&&c<=_T('z')) || (c>=_T('0')&&c<=_T('9'))
        || c==_T('_') || c==_T(':') || c==_T('-') || c==_T('.') || ((unsigned int)c)>0x7f )
       continue;
      break;
     }
    }
   }
   if ( ! bIgnoreAmpersand )
   {
    pFound = szaReplace[pFound-pFind];
    MCD_BLDAPPEND(strText,pFound);
   }
   ++pSource; // ASCII, so 1 byte
  }
  else
  {
   nCharLen = MCD_CLEN( pSource );
   MCD_BLDAPPENDN(strText,pSource,nCharLen);
   pSource += nCharLen;
  }
  cSource = *pSource;
 }

 MCD_BLDRELEASE(strText);
 return strText;
}

MCD_STR CMarkup::UnescapeText( MCD_CSTR szText, int nTextLength /*=-1*/ )
{
 // Convert XML friendly text to text as seen outside XML document
 // ampersand escape codes replaced with special characters e.g. convert "6&gt;7" to "6>7"
 // ampersand numeric codes replaced with character e.g. convert &#60; to <
 // Conveniently the result is always the same or shorter in byte length
 //
 static MCD_PCSZ szaCode[] = { _T("lt;"),_T("amp;"),_T("gt;"),_T("apos;"),_T("quot;") };
 static int anCodeLen[] = { 3,4,3,5,5 };
 static MCD_PCSZ szSymbol = _T("<&>/'/"");
 MCD_STR strText;
 MCD_PCSZ pSource = szText;
 if ( nTextLength == -1 )
  nTextLength = MCD_PSZLEN(szText);
 MCD_BLDRESERVE(strText,nTextLength);
 int nCharLen;
 int nChar = 0;
 while ( nChar < nTextLength )
 {
  if ( pSource[nChar] == _T('&') )
  {
   bool bCodeConverted = false;

   // Is it a numeric character reference?
   if ( pSource[nChar+1] == _T('#') )
   {
    // Is it a hex number?
    int nBase = 10;
    int nNumericChar = nChar + 2;
    MCD_CHAR cChar = pSource[nNumericChar];
    if ( cChar == _T('x') )
    {
     ++nNumericChar;
     cChar = pSource[nNumericChar];
     nBase = 16;
    }

    // Look for terminating semi-colon within 7 characters
    int nCodeLen = 0;
    while ( nCodeLen < 7 && cChar && cChar != _T(';') )
    {
     // only ASCII digits 0-9, A-F, a-f expected
     nCodeLen += MCD_CLEN( &pSource[nNumericChar+nCodeLen] );
     cChar = pSource[nNumericChar + nCodeLen];
    }

    // Process unicode
    if ( cChar == _T(';') )
    {
     int nUnicode = MCD_PSZTOL( &pSource[nNumericChar], NULL, nBase );
#if defined(UNICODE)
     MCD_BLDAPPEND1(strText,nUnicode);
#elif defined(_MBCS)
     MCD_CHAR szANSI[2];
     int nMBLen = wctomb( szANSI, (wchar_t)nUnicode );
     if ( nMBLen > 0 )
     {
      MCD_BLDAPPENDN(strText,szANSI,nMBLen);
     }
     else
      nUnicode = 0;
#else
     if ( nUnicode < 0x80 )
      MCD_BLDAPPEND1(strText,nUnicode);
     else if ( nUnicode < 0x800 )
     {
      // Convert to 2-byte UTF-8
      MCD_BLDAPPEND1(strText,((nUnicode&0x7c0)>>6)|0xc0);
      MCD_BLDAPPEND1(strText,(nUnicode&0x3f)|0x80);
     }
     else
     {
      // Convert to 3-byte UTF-8
      MCD_BLDAPPEND1(strText,((nUnicode&0xf000)>>12)|0xe0);
      MCD_BLDAPPEND1(strText,((nUnicode&0xfc0)>>6)|0x80);
      MCD_BLDAPPEND1(strText,(nUnicode&0x3f)|0x80);
     }
#endif
     if ( nUnicode )
     {
      // Increment index past ampersand semi-colon
      nChar = nNumericChar + nCodeLen + 1;
      bCodeConverted = true;
     }
    }
   }
   else // does not start with #
   {
    // Look for matching &code;
    for ( int nMatch = 0; nMatch < 5; ++nMatch )
    {
     if ( nChar < nTextLength - anCodeLen[nMatch]
      && MCD_PSZNCMP(szaCode[nMatch],&pSource[nChar+1],anCodeLen[nMatch]) == 0 )
     {
      // Insert symbol and increment index past ampersand semi-colon
      MCD_BLDAPPEND1(strText,szSymbol[nMatch]);
      nChar += anCodeLen[nMatch] + 1;
      bCodeConverted = true;
      break;
     }
    }
   }

   // If the code is not converted, leave it as is
   if ( ! bCodeConverted )
   {
    MCD_BLDAPPEND1(strText,_T('&'));
    ++nChar;
   }
  }
  else // not &
  {
   nCharLen = MCD_CLEN(&pSource[nChar]);
   MCD_BLDAPPENDN(strText,&pSource[nChar],nCharLen);
   nChar += nCharLen;
  }
 }
 MCD_BLDRELEASE(strText);
 return strText;
}

int CMarkup::UTF16To8( char* pszUTF8, const wchar_t* pwszUTF16, int nUTF8Count )
{
 // Supports the same arguments as wcstombs
 // the pwszUTF16 source must be a NULL-terminated UTF-16 string
 // if pszUTF8 is NULL, the number of bytes required is returned and nUTF8Count is ignored
 // otherwise pszUTF8 is filled with the result string and NULL-terminated if nUTF8Count allows
 // nUTF8Count is the byte size of pszUTF8 and must be large enough for the NULL if NULL desired
 // and the number of bytes (excluding NULL) is returned
 //
 int nUChar, nUTF8Len = 0;
 while ( *pwszUTF16 )
 {
  // Decode UTF-16
  nUChar = DecodeCharUTF16( pwszUTF16 );
  if ( nUChar == -1 )
   nUChar = '?';

  // Encode UTF-8
  if ( pszUTF8 && nUTF8Len + 4 > nUTF8Count )
  {
   int nUTF8LenSoFar = nUTF8Len;
   EncodeCharUTF8( nUChar, NULL, nUTF8Len );
   if ( nUTF8Len > nUTF8Count )
    return nUTF8LenSoFar;
   nUTF8Len = nUTF8LenSoFar;
  }
  EncodeCharUTF8( nUChar, pszUTF8, nUTF8Len );
 }
 if ( pszUTF8 && nUTF8Len < nUTF8Count )
  pszUTF8[nUTF8Len] = 0;
 return nUTF8Len;
}

int CMarkup::DecodeCharUTF8( const char*& pszUTF8 )
{
 // Return Unicode code point and increment pszUTF8 past 1-4 bytes
 int nUChar = (unsigned char)*pszUTF8;
 ++pszUTF8;
 if ( nUChar & 0x80 )
 {
  int nExtraChars;
  if ( ! (nUChar & 0x20) )
  {
   nExtraChars = 1;
   nUChar &= 0x1f;
  }
  else if ( ! (nUChar & 0x10) )
  {
   nExtraChars = 2;
   nUChar &= 0x0f;
  }
  else if ( ! (nUChar & 0x08) )
  {
   nExtraChars = 3;
   nUChar &= 0x07;
  }
  else
   return -1;
  while ( nExtraChars-- )
  {
   if ( (*pszUTF8 & 0x80) )
   {
    nUChar = nUChar<<6;
    nUChar |= *pszUTF8 & 0x3f;
   }
   else
    return -1;
   ++pszUTF8;
  }
 }
 return nUChar;
}

void CMarkup::EncodeCharUTF16( int nUChar, wchar_t* pwszUTF16, int& nWideLen )
{
 // Write UTF-16 sequence to pwszUTF16 for Unicode code point nUChar and update nWideLen
 // Be sure pwszUTF16 has room for up to 2 wide chars
 //
 if ( nUChar & ~0xffff )
 {
  if ( pwszUTF16 )
  {
   // Surrogate pair
   nUChar -= 0x10000;
   pwszUTF16[nWideLen++] = (wchar_t)(((nUChar>>10) & 0x3ff) | 0xd800); // W1
   pwszUTF16[nWideLen++] = (wchar_t)((nUChar & 0x3ff) | 0xdc00); // W2
  }
  else
   nWideLen += 2;
 }
 else
 {
  if ( pwszUTF16 )
   pwszUTF16[nWideLen++] = (wchar_t)nUChar;
  else
   ++nWideLen;
 }
}

int CMarkup::UTF8To16( wchar_t* pwszUTF16, const char* pszUTF8, int nUTF8Count )
{
 // Supports the same arguments as mbstowcs
 // the pszUTF8 source must be a UTF-8 string which will be processed up to NULL-terminator or nUTF8Count
 // if pwszUTF16 is NULL, the number of wide chars required is returned
 // nUTF8Count is maximum UTF-8 bytes to convert and should include NULL if NULL desired in result
 // if pwszUTF16 is not NULL it is filled with the result string and it must be large enough
 // result will be NULL-terminated if NULL encountered in pszUTF8 before nUTF8Count
 // and the number of UTF-8 bytes converted is returned
 //
 const char* pszPosUTF8 = pszUTF8;
 int nUChar, nUTF8Len = 0, nWideLen = 0;
 while ( nUTF8Len < nUTF8Count )
 {
  // Decode UTF-8
  if ( nUTF8Len + 4 > nUTF8Count )
  {
   // Pre-examine UTF-8 character using temporary null-terminated copy
   // to see if this UTF-8 character boundary is within nUTF8Count
   char szUTF8Copy[5];
   const char* pszPosUTF8Copy = szUTF8Copy;
   int nUTF8EndCount = nUTF8Count - nUTF8Len;
   strncpy( szUTF8Copy, pszPosUTF8, nUTF8EndCount );
   szUTF8Copy[nUTF8EndCount] = '/0';
   nUChar = DecodeCharUTF8( pszPosUTF8Copy );
   int nUTF8EndLen = (int)(pszPosUTF8Copy - szUTF8Copy);
   if ( nUTF8Len + nUTF8EndLen > nUTF8Count )
    break;
  }
  nUChar = DecodeCharUTF8( pszPosUTF8 );
  nUTF8Len = (int)(pszPosUTF8 - pszUTF8);
  if ( ! nUChar )
  {
   if ( pwszUTF16 )
    pwszUTF16[nWideLen] = 0;
   break;
  }
  else if ( nUChar == -1 )
   nUChar = '?';

  // Encode UTF-16
  EncodeCharUTF16( nUChar, pwszUTF16, nWideLen );
 }
 if ( ! pwszUTF16 )
  return nWideLen;
 return nUTF8Len;
}

int CMarkup::DecodeCharUTF16( const wchar_t*& pwszUTF16 )
{
 // Return Unicode code point and increment pwszUTF16 past 1 or 2 (if surrogrates) wide chars
 int nUChar = *pwszUTF16;
 if ( (nUChar & ~0x000007ff) == 0xd800 ) // W1
 {
  ++pwszUTF16;
  if ( ! *pwszUTF16 ) // W2
   return -1; // incorrect UTF-16
  nUChar = (((nUChar & 0x3ff) << 10) | (*pwszUTF16 & 0x3ff)) + 0x10000;
 }
 ++pwszUTF16;
 return nUChar;
}

void CMarkup::EncodeCharUTF8( int nUChar, char* pszUTF8, int& nUTF8Len )
{
 // Write UTF-8 sequence to pszUTF8 for Unicode code point nUChar and update nUTF8Len
 // Be sure pszUTF8 has room for up to 4 bytes
 //
 if ( ! (nUChar & ~0x0000007f) ) // < 0x80
 {
  if ( pszUTF8 )
   pszUTF8[nUTF8Len++] = (char)nUChar;
  else
   ++nUTF8Len;
 }
 else if ( ! (nUChar & ~0x000007ff) ) // < 0x800
 {
  if ( pszUTF8 )
  {
   pszUTF8[nUTF8Len++] = (char)(((nUChar&0x7c0)>>6)|0xc0);
   pszUTF8[nUTF8Len++] = (char)((nUChar&0x3f)|0x80);
  }
  else
   nUTF8Len += 2;
 }
 else if ( ! (nUChar & ~0x0000ffff) ) // < 0x10000
 {
  if ( pszUTF8 )
  {
   pszUTF8[nUTF8Len++] = (char)(((nUChar&0xf000)>>12)|0xe0);
   pszUTF8[nUTF8Len++] = (char)(((nUChar&0xfc0)>>6)|0x80);
   pszUTF8[nUTF8Len++] = (char)((nUChar&0x3f)|0x80);
  }
  else
   nUTF8Len += 3;
 }
 else // < 0x110000
 {
  if ( pszUTF8 )
  {
   pszUTF8[nUTF8Len++] = (char)(((nUChar&0x1c0000)>>18)|0xf0);
   pszUTF8[nUTF8Len++] = (char)(((nUChar&0x3f000)>>12)|0x80);
   pszUTF8[nUTF8Len++] = (char)(((nUChar&0xfc0)>>6)|0x80);
   pszUTF8[nUTF8Len++] = (char)((nUChar&0x3f)|0x80);
  }
  else
   nUTF8Len += 4;

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值