package com.tao.acl.common.util;
import org.apache.commons.lang3.ObjectUtils;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
public class ChineseSpellingUtil {
public static void main(String[] args)
{
String simpleString = "测试用例";
String complexString = "中国红红sdfsd#%66^$@5";
System.out.println(simpleString + " = " +
getFullSpelling(simpleString));
System.out.println(complexString + " = " +
getFullSpelling(complexString));
System.out.println(simpleString + " = " +
getFirstSpelling(simpleString));
System.out.println(complexString + " = " +
getFirstSpelling(complexString));
}
private static String toPinyin(char ch)
{
HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();
outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
String[] pinyinArray = null;
try
{
pinyinArray = PinyinHelper.toHanyuPinyinStringArray(ch,
outputFormat);
}
catch (Exception e1)
{
e1.printStackTrace();
}
if ((pinyinArray != null) && (pinyinArray.length > 0))
{
StringBuffer pinyinStrBuf = new StringBuffer();
pinyinStrBuf.append(pinyinArray[0]);
return pinyinStrBuf.toString();
}
return null;
}
private static String getSpelling(String source, boolean firstLetter)
{
if (ObjectUtils.isEmpty(source)) {
return source;
}
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < source.length(); i++)
{
char ch = source.charAt(i);
if ((ch < '?') && (ch > 0))
{
buffer.append(ch);
}
else
{
String pinyin = toPinyin(ch);
if (pinyin != null) {
buffer.append(firstLetter ? pinyin.substring(0, 1) :
pinyin);
} else {
buffer.append(ch);
}
}
}
return buffer.toString();
}
public static String getFullSpelling(String source)
{
return getSpelling(source, false);
}
public static String getFirstSpelling(String source)
{
return getSpelling(source, true);
}
}
上述代码执行之后输出
即getFullSpelling()会把中文转成拼音,其他字符保留。而getFirstSpelling()会把中文拼音首字母取出,其他字符保留!
知是行之始,行是知之获!