针对多音字不是很准确.
public static void main( String[] args ) {
System.out.println( getGBKpy( "Hi.." ) ); //非汉字不转
System.out.println( getGBKpy( "成都" ) );
System.out.println( getGBKpy( "重庆" ) ); //多单字不准确
}
public static String getGB2312py( String hzString ) {
//System.out.println(GB2312PyTable.length());
if (hzString == null || hzString.length() == 0) {
return "";
}
byte eB[] = hzString.getBytes();
String eRe = "";
int Hz_1 = 0, Hz_2 = 0;
int HzPos = 0;
int pyPos = 0;
for (int i = 0; i < eB.length; i++) {
if (eB[i] >= 0) {
eRe += (char) eB[i];
HzPos = 0;
}
else {
if (HzPos == 0) {
Hz_1 = 256 + eB[i];
HzPos++;
}
else if (HzPos == 1) {
Hz_2 = 256 + (int) eB[i];
HzPos++;
if (HzPos == 2) {
//System.out.println(Hz_1+"/t"+Hz_2);
pyPos = (Hz_1 - 176) * 94 + (Hz_2 - 161);
//System.out.println(pyPos);
if (pyPos >= 0) {
eRe += GB2312PyTable.charAt(pyPos);
}
HzPos = 0;
}
}
}
}
return eRe;
}
/**
* 获取GBK字的拼音的首字母
* 由于数据较大,完整的GBK编码表按GBK规范分成3部分
* GBK/2为与GB2312兼容的国标汉字部分,GBK/3和GBK/4为扩展汉字部分
* 每一部分都有自己的地址计算公式
* 若输入是acsii则返回同样的acsii
* 若输入是中文字符则返回拼音的首字母
* 若输入是中文字符但是该字符不知道如何发音,则返回空字符
* @param hzString
* @return
*/
public static String getGBKpy(String hzString) {
int pyi, len, no;
int ch1code = 0, ch2code = 0;
char ch1, ch2 ,pystr;
String py ;
//快速处理
if (hzString == null || hzString.length() == 0) {
return "";
}
byte eB[] = hzString.getBytes();
len = eB.length;
//开始计算
pyi = 0;
py = "";
while (pyi < len) {
ch1 = (char)eB[pyi];
pyi = pyi + 1;
ch1code = ch1;
if (ch1code >0 && ch1code < 129) {
//普通的acsii
py = py + ch1;
continue;
}
else {
//GBK字符
ch1 = (char)(256 + (int)ch1);
if (eB[pyi] <0 ){
ch2 = (char)(256 + (int)eB[pyi]);
}else{
ch2 = (char)eB[pyi];
}
pyi = pyi + 1;
if (pyi > len) {
break;
}
}
ch1code = ch1;
ch2code = ch2;
if (ch1code <= 160 && ch1code >= 129) {
//查找GBK_3
no = (ch1code - 129) * 191 + (ch2code - 63);
pystr = gbk3.charAt(no -1 );
}
else if (ch1code <= 254 && ch1code >= 170) {
//查找GBK_3
if (ch2code > 160) {
//查找GBK_2
no = (ch1code - 176) * 94 + (ch2code - 160);
pystr = gbk2.charAt(no -1);
}
else {
//查找GBK_4
no = (ch1code - 170) * 97 + (ch2code - 63);
pystr = gbk4.charAt(no -1) ;
}
}
else {
//不是GBK汉字
continue;
}
py = py + pystr;
}
return py.toLowerCase().trim();
}
/**
* 定义每一个汉字的拼音码_ GB2312编码的汉字
*/
private static final String GB2312PyTable =
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + "aaaabbbbbbbbbbbbbbbbbbbbbbbbbbbb" +
"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" +
"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" +
"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbcccc" +
"cccccccccccccccccccccccccccccccc" +
"cccccccccccccccccccccccccccccccc" + "cccccccccccccccccccccccccccccccc" +
"cccccccccccccccccccccccccccccccc" +
"cccccccccccccccccccccccccccccccc" + "cccccccccccccccccccccccccccccccc" +
"cccccccccccccccccccccccccccccccc" +
"cccccddddddddddddddddddddddddddd" + "dddddddddddddddddddddddddddddddd" +
"dddddddddddddddddddddddddddddddd" +
"dddddddddddddddddddddddddddddddd" + "dddddddddddddddddddddddddddddddd" +
"dddddddddddddddddddddddddddddeee" +
"eeeeeeeeeeeeeeeeeeefffffffffffff" + "ffffffffffffffffffffffffffffffff" +
"ffffffffffffffffffffffffffffffff" +
"ffffffffffffffffffffffffffffffff" + "ffffffffffffffffgggggggggggggggg" +
"gggggggggggggggggggggggggggggggg" +
"gggggggggggggggggggggggggggggggg" + "gggggggggggggggggggggggggggggggg" +
"gggggggggggggggggggggggggggggggg" +
"ggggggggggghhhhhhhhhhhhhhhhhhhhh" + "hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" +
"hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" +
"hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" + "hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" +
"hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh" +
"jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" + "jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" +
"jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" +
"jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" + "jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" +
"jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" +
"jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" + "jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" +
"jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" +
"jjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkk" + "kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk" +
"kkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkk" +
"kkkkkkkkkkklllllllllllllllllllll" + "llllllllllllllllllllllllllllllll" +
"llllllllllllllllllllllllllllllll" +
"llllllllllllllllllllllllllllllll" + "llllllllllllllllllllllllllllllll" +
"llllllllllllllllllllllllllllllll" +
"llllllllllllllllllllllllllllllll" + "llllllllllllllllllllllllllllllll" +
"lllmmmmmmmmmmmmmmmmmmmmmmmmmmmmm" +
"mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm" + "mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm" +
"mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm" +
"mmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnn" + "nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn" +
"nnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnn" +
"nnnnnnnnnnnooooooooppppppppppppp" + "pppppppppppppppppppppppppppppppp" +
"pppppppppppppppppppppppppppppppp" +
"pppppppppppppppppppppppppppppppp" + "pppppppppppppqqqqqqqqqqqqqqqqqqq" +
"qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq" +
"qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq" + "qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq" +
"qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq" +
"qqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrr" + "rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr" +
"rrrrrsssssssssssssssssssssssssss" +
"ssssssssssssssssssssssssssssssss" + "ssssssssssssssssssssssssssssssss" +
"ssssssssssssssssssssssssssssssss" +
"ssssssssssssssssssssssssssssssss" + "ssssssssssssssssssssssssssssssss" +
"ssssssssssssssssssssssssssssssss" +
"ssssssssssssssssssssssssssssssss" + "ssssssssssssssssssssssssssssssss" +
"sssttttttttttttttttttttttttttttt" +
"tttttttttttttttttttttttttttttttt" + "tttttttttttttttttttttttttttttttt" +
"tttttttttttttttttttttttttttttttt" +
"tttttttttttttttttttttttttttttttw" + "wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww" +
"wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww" +
"wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww" + "wwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxx" +
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +
"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +
"xxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyy" +
"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" +
"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" +
"yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" + "yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy" +