<!-- /* Font Definitions */ @font-face {font-family:宋体; panose-1:2 1 6 0 3 1 1 1 1 1; mso-font-alt:SimSun; mso-font-charset:134; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 135135232 16 0 262145 0;} @font-face {font-family:"/@宋体"; panose-1:2 1 6 0 3 1 1 1 1 1; mso-font-charset:134; mso-generic-font-family:auto; mso-font-pitch:variable; mso-font-signature:3 135135232 16 0 262145 0;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal {mso-style-parent:""; margin:0cm; margin-bottom:.0001pt; text-align:justify; text-justify:inter-ideograph; mso-pagination:none; font-size:10.5pt; mso-bidi-font-size:12.0pt; font-family:"Times New Roman"; mso-fareast-font-family:宋体; mso-font-kerning:1.0pt;} a:link, span.MsoHyperlink {color:blue; text-decoration:underline; text-underline:single;} a:visited, span.MsoHyperlinkFollowed {color:purple; text-decoration:underline; text-underline:single;} /* Page Definitions */ @page {mso-page-border-surround-header:no; mso-page-border-surround-footer:no;} @page Section1 {size:595.3pt 841.9pt; margin:72.0pt 90.0pt 72.0pt 90.0pt; mso-header-margin:42.55pt; mso-footer-margin:49.6pt; mso-paper-source:0; layout-grid:15.6pt;} div.Section1 {page:Section1;} /* List Definitions */ @list l0 {mso-list-id:115414869; mso-list-type:hybrid; mso-list-template-ids:1220474526 1760334094 67698713 67698715 67698703 67698713 67698715 67698703 67698713 67698715;} @list l0:level1 {mso-level-text:%1; mso-level-tab-stop:21.0pt; mso-level-number-position:left; margin-left:21.0pt; text-indent:-21.0pt;} ol {margin-bottom:0cm;} ul {margin-bottom:0cm;} -->
网络上汉字转拼音的方法一般分 2 种
1 查表法 需要定义对应表
2 分支法 if else 判断
由于汉字在 GBK 编码的时候 4000 左右的常用字是按拼音顺序安排的,之后新增的汉字无法再按拼音顺序插入,所以之后的汉字是按笔画排序的。
Gbk 的编码可是查看 http://www.knowsky.com/resource/gb2312tbm.htm#top
在网上找到字库比较全的 if else 分支判断代码 这个非常感谢 牛文平
我在使用他的代码时也纠正了有些汉字的编码错误,得到我想要的汉字编码表。由于程序在手机上运行, if else 判断效率太低,而编码表的建立需要 50K 的编码表,内存虽说有点大,不过可以改进,就选用查表法实现。
首先 在 Carbide 中 模拟器得到的汉字是 Unicode 编码,需要转换成 GBK 才能使用表来进行查找。下面的函数就是实现 Unicode 转成 GBK 的编码,调用 Symbian SDK 的 ConvertFromUnicode () 函数实现。(需要导入 <charconv.h> 和 charconv.lib 库)
void ConvUni2Gbk ( TDesC & aUnicode, TDes8 & aGbk)
{
TInt state= CCnvCharacterSetConverter :: KStateDefault ;
CCnvCharacterSetConverter * converter = CCnvCharacterSetConverter :: NewLC ();
if (converter-> PrepareToConvertToOrFromL (KCharacterSetIdentifierGbk, CEikonEnv :: Static ()-> FsSession ())!= CCnvCharacterSetConverter :: EAvailable ) // 在转化前判断是否可以转化
User :: Leave (KErrNotSupported);
if ( CCnvCharacterSetConverter :: EErrorIllFormedInput ==converter-> ConvertFromUnicode (aGbk,aUnicode,state))// 这里开始转化
User :: Leave (KErrArgument);
CleanupStack :: PopAndDestroy ();
}
TBuf8 <2> a[] = { _L8 ( "0" ), _L8 ( "1" ), _L8 ( "1" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ),
_L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "2" ), _L8 ( "3" ), _L8 ( "3" ), _L8 ( "3" ), _L8 ( "3" ), _L8 ( "3" ),
_L8 ( "3" ), _L8 ( "3" ), _L8 ( "3" ), _L8 ( "3" ), _L8 ( "4" ), _L8 ( "4" ), _L8 ( "4" ), _L8 ( "5" ), _L8 ( "5" ), _L8 ( "5" ), _L8 ( "5" ),
_L8 ( "5" ), _L8 ( "5" ), _L8 ( "5" ), _L8 ( "5" ), _L8 ( "5" )};
定义拼音 a 的表,(之后可以定义其他的表,这里使用 TBuf8<2> 与 TInt 数据大小相同)
TBuf8 <5> all[] = { _L8 ( "&" ), _L8 ( "a" ), _L8 ( "ai" ), _L8 ( "an" ), _L8 ( "ang" ), _L8 ( "ao" ),
_L8 ( "e" ), _L8 ( "ei" ), _L8 ( "en" ), _L8 ( "er" ), _L8 ( "eng" ),
_L8 ( "ha" ), _L8 ( "hai" ), _L8 ( "han" ), _L8 ( "hang" ), _L8 ( "hao" ),
_L8 ( "he" ), _L8 ( "hen" ), _L8 ( "heng" ), _L8 ( "hi" ),
_L8 ( "hong" ), _L8 ( "hou" ),
_L8 ( "hu" ), _L8 ( "hua" ), _L8 ( "huai" ), _L8 ( "huan" ), _L8 ( "huang" ), _L8 ( "hui" ), _L8 ( "hun" ), _L8 ( "huo" ),
_L8 ( "i" ), _L8 ( "ia" ), _L8 ( "ian" ), _L8 ( "iang" ), _L8 ( "iao" ), _L8 ( "ie" ), _L8 ( "in" ), _L8 ( "ing" ), _L8 ( "iong" ), _L8 ( "iu" ),
_L8 ( "o" ), _L8 ( "ong" ), _L8 ( "ou" ),
_L8 ( "u" ), _L8 ( "ua" ), _L8 ( "uai" ), _L8 ( "uan" ), _L8 ( "uang" ), _L8 ( "ue" ), _L8 ( "ui" ), _L8 ( "un" ), _L8 ( "uo" )};
以上 2 表配合使用,通过 2 次查表得到想要的拼音编码
TBuf8 <4> gbkBuf;
ConvUni2Gbk (aUnicode,gbkBuf);
int n = 0;
if (gbkBuf[0] > 175)
n = gbkBuf[0] - 176; // 减去 B0
if (gbkBuf[1] > 160)
n = n * 100 + gbkBuf[1] - 160; //A0
TBuf8 <6> tempPinYin;
TBuf <4> tempfind;
if (n <= 0)
return 0;
else if (n <= 36 ) //&& n > 0)
{
TBuf8 <1> find = a[n];
tempfind. Copy (find);
TLex iLex(tempfind);
TInt tempNum;
//TBuf 转 TInt
iLex. Val (tempNum); // tempNum 现在包含了 2 位数字
tempPinYin. Append (all[tempNum]);
}
else if (n <= 232 ) //&& n >= 37)
{
tempPinYin. Append ( 'b' );
TBuf8 <2> find = b[n - 37];
tempfind. Copy (find );
TLex iLex(tempfind);
TInt tempNum;
//TBuf 转 TInt
iLex. Val (tempNum); // tempNum 现在包含了 2 位数字
tempPinYin. Append (all[tempNum]);
}
通过调整 GBK 的编码,进行判断输出想要的拼音。
对没有按拼音排序的代码,可以获得所有拼音的表, 388 项
建立无拼音续汉字的初级表,数字编码,链接到所有拼音的表,通过 2 次查找得到
优化: 上面的判断可以通过 2 分查找缩短比较次数,不过需要建立数组的数组表,我没有找到合适的方法,期待高手解决
博文结束 宝杰文