做了一個將簡體漢字轉化成正體漢字的類ChnTrans,轉化賴字典文件。
其中用Open函式打開字典文件(有FILE *版本和CFile版本);用TransChar轉化一個漢字;用Trans轉化一個字符串。
ChnTrans.h
文件內容:
- #pragma once
- #define UNICODE_FILE_FLAG 0xFEFF
- #define MAX_UNICODE_FILE_SIZE 65535
- class CChnTrans
- {
- private:
- TCHAR * pBuffer;
- //TChar字符計數
- UINT uiBufferTCharCount;
- //CFile版本
- //BOOL IsUnicodeFile(CFile * const poFile);
- //FILE *版本
- BOOL IsUnicodeFile(FILE * pFile);
- public:
- CChnTrans(void);
- ~CChnTrans(void);
- BOOL Open(const CString * const pFileName);
- BOOL TransChar(TCHAR * const pTCh);
- BOOL Trans(CString * const pstrLine);
- };
ChnTrans.cpp
文件內容:
- #include "StdAfx.h"
- #include "ChnTrans.h"
- CChnTrans::CChnTrans(void)
- {
- pBuffer = NULL;
- uiBufferTCharCount = 0;
- }
- CChnTrans::~CChnTrans(void)
- {
- free(pBuffer);
- pBuffer = NULL;
- uiBufferTCharCount = 0;
- }
- #if 0
- //開啟文件函式,CFile版本
- BOOL CChnTrans::Open(const CString * const pFileName)
- {
- if(NULL == pFileName)
- {
- return FALSE;
- }
- CFile foChineseMap;
- if(TRUE != foChineseMap.Open(*pFileName, CFile::modeRead | CFile::shareDenyWrite, NULL))
- {
- return FALSE;
- }
- if(FALSE == IsUnicodeFile(&foChineseMap))
- {
- foChineseMap.Close();
- return FALSE;
- }
- if(MAX_UNICODE_FILE_SIZE < foChineseMap.GetLength())
- {
- foChineseMap.Close();
- return FALSE;
- }
- //檢測統一編碼字符文件減去文件頭的統一編碼標志後的字節數能否被TCHAR整除,不能則表示文件有問題
- if(0 !=(foChineseMap.GetLength() - sizeof(TCHAR)) % sizeof(TCHAR))
- {
- foChineseMap.Close();
- return FALSE;
- }
- uiBufferTCharCount = (UINT)((foChineseMap.GetLength() - sizeof(TCHAR)) / sizeof(TCHAR));
- pBuffer = (TCHAR *)malloc(uiBufferTCharCount * sizeof(TCHAR));
- if(NULL == pBuffer)
- {
- foChineseMap.Close();
- return FALSE;
- }
- memset(pBuffer, 0, uiBufferTCharCount * sizeof(TCHAR));
- if((uiBufferTCharCount * sizeof(TCHAR)) != foChineseMap.Read(pBuffer, uiBufferTCharCount * sizeof(TCHAR)))
- {
- foChineseMap.Close();
- free(pBuffer);
- pBuffer = NULL;
- return FALSE;
- }
- foChineseMap.Close();
- return TRUE;
- }
- #endif
- //開啟文件函式,FILE *版本
- BOOL CChnTrans::Open(const CString * const pFileName)
- {
- if(NULL == pFileName)
- {
- return FALSE;
- }
- FILE * pChineseMap = NULL;
- pChineseMap = _wfopen(*pFileName, _T("rb"));
- if(NULL == pChineseMap)
- {
- return FALSE;
- }
- if(FALSE == IsUnicodeFile(pChineseMap))
- {
- fclose(pChineseMap);
- return FALSE;
- }
- /開始:檢測字典文件的大小,字典文件不得超過MAX_UNICODE_FILE_SIZE + 2字節,大于該大小按照該大小處理///
- BYTE * pBufferTmp = (BYTE *)malloc(MAX_UNICODE_FILE_SIZE);
- if(NULL == pBufferTmp)
- {
- fclose(pChineseMap);
- return FALSE;
- }
- UINT uiFileSize = (UINT)fread(pBufferTmp, sizeof(BYTE), MAX_UNICODE_FILE_SIZE, pChineseMap);
- if(0 != uiFileSize % sizeof(TCHAR))
- {
- free(pBufferTmp);
- fclose(pChineseMap);
- return FALSE;
- }
- uiBufferTCharCount = uiFileSize / sizeof(TCHAR);
- /結束:檢測字典文件的大小,字典文件不得超過MAX_UNICODE_FILE_SIZE + 2字節,大于該大小按照該大小處理///
- pBuffer = (TCHAR *)malloc(uiBufferTCharCount * sizeof(TCHAR));
- if(NULL == pBuffer)
- {
- fclose(pChineseMap);
- return FALSE;
- }
- memset(pBuffer, 0, uiBufferTCharCount * sizeof(TCHAR));
- memcpy(pBuffer, pBufferTmp, sizeof(TCHAR) * uiBufferTCharCount);
- free(pBufferTmp);
- fclose(pChineseMap);
- return TRUE;
- }
- #if 0
- //檢測輸入文件是否非統一編碼(Unicode)文件,CFile版本
- BOOL CChnTrans::IsUnicodeFile(CFile * const poFile)
- {
- if(NULL == poFile)
- {
- return FALSE;
- }
- TCHAR FirstTChar = 0;
- poFile->Read(&FirstTChar, sizeof(TCHAR));
- if(UNICODE_FILE_FLAG != *pFirstTChar)
- {
- return FALSE;
- }
- return TRUE;
- }
- #endif
- //檢測輸入文件是否非統一編碼(Unicode)文件,FILE *版本
- BOOL CChnTrans::IsUnicodeFile(FILE * pFile)
- {
- if(NULL == pFile)
- {
- return FALSE;
- }
- TCHAR FirstTChar = 0;
- if((1 != fread(&FirstTChar, sizeof(TCHAR), 1, pFile)) || (UNICODE_FILE_FLAG != FirstTChar))
- {
- return FALSE;
- }
- return TRUE;
- }
- //轉換單個漢字函數
- BOOL CChnTrans::TransChar(TCHAR * const pTCh)
- {
- if(NULL == pTCh || NULL == pBuffer || 2 > uiBufferTCharCount)
- {
- return FALSE;
- }
- UINT i = 0;
- for(; i < uiBufferTCharCount; i += 3)
- {
- if(*(pBuffer + i) == *pTCh)
- {
- *pTCh = *(pBuffer + i + 1);
- return TRUE;
- }
- }
- return TRUE;
- }
- //轉換字符串函數
- BOOL CChnTrans::Trans(CString * const pstrLine)
- {
- if(NULL == pstrLine || NULL == pBuffer || 2 > uiBufferTCharCount)
- {
- return FALSE;
- }
- if(0 == pstrLine->GetLength())
- {
- return TRUE;
- }
- TCHAR * ptr = pstrLine->GetBuffer();
- int iLineTCharNum = pstrLine->GetLength();
- int i = 0;
- for(; i < iLineTCharNum; i++)
- {
- if(TRUE != TransChar(ptr))
- {
- pstrLine->ReleaseBuffer();
- return FALSE;
- }
- ptr++;
- }
- pstrLine->ReleaseBuffer();
- return TRUE;
- }
測試程式代碼:
Han.cpp
文件內容:
- // Han.cpp : 定義主控台應用程式的進入點。
- //
- #include "stdafx.h"
- #include "Han.h"
- #include "ChnTrans.h"
- #ifdef _DEBUG
- #define new DEBUG_NEW
- #endif
- // 僅有的一個應用程式物件
- CWinApp theApp;
- using namespace std;
- int _tmain(int argc, TCHAR* argv[], TCHAR* envp[])
- {
- int nRetCode = 0;
- // 初始化 MFC 並於失敗時列印錯誤
- if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
- {
- // TODO: 配合您的需要變更錯誤碼
- _tprintf(_T("嚴重錯誤: MFC 初始化失敗/n"));
- nRetCode = 1;
- }
- class CChnTrans * pTC = new CChnTrans;
- CString strFileName = _T("C://han.txt");
- if(TRUE != pTC->Open(&strFileName))
- {
- _tprintf(_T("err: pTC->Open f!/n"));
- delete pTC;
- return FALSE;
- }
- CString p = _T("一国家汉汉字国");
- pTC->Trans(&p);
- delete pTC;
- return nRetCode;
- }
字庫文件
c:/han.txt
格式這樣寫(用記事本保存成Unicode形式):
一個簡體漢字緊跟它的正體漢字空格一個簡體漢字緊跟它的正體漢字空格……
如:
汉漢 国國