/**
* 汉字转换拼音工具类
*
* @Project UserCenter
* @Author shiming.xin
* @Version 1.0
* @Date Mar 30, 2011 10:36:07 AM
*/
public class PinYin4JCn {
/**
* 汉字转换位汉语拼音首字母,英文字符不变,特殊字符丢失
* 支持多音字,生成方式如(重当参:cdc,zds,cds,zdc)
*
* @param chines
* 汉字
* @return 拼音
*/
public static String converterToFirstSpell(String chines) {
StringBuffer pinyinName = new StringBuffer();
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
if (nameChar[i] > 128) {
try {
// 取得当前汉字的所有全拼
String[] strs = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat);
if (strs != null) {
for (int j = 0; j < strs.length; j++) {
// 取首字母
pinyinName.append(strs[j].charAt(0));
if (j != strs.length - 1) {
pinyinName.append(",");
}
}
}
// else {
// pinyinName.append(nameChar[i]);
// }
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinName.append(nameChar[i]);
}
pinyinName.append(" ");
}
// return pinyinName.toString();
return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));
}
/**
* 汉字转换位汉语全拼,英文字符不变,特殊字符丢失
* 支持多音字,生成方式如(重当参:zhongdangcen,zhongdangcan,chongdangcen,chongdangshen,zhongdangshen,chongdangcan)
*
* @param chines
* 汉字
* @return 拼音
*/
public static String converterToSpell(String chines) {
StringBuffer pinyinName = new StringBuffer();
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
if (nameChar[i] > 128) {
try {
// 取得当前汉字的所有全拼
String[] strs = PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat);
if (strs != null) {
for (int j = 0; j < strs.length; j++) {
pinyinName.append(strs[j]);
if (j != strs.length - 1) {
pinyinName.append(",");
}
}
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinName.append(nameChar[i]);
}
pinyinName.append(" ");
}
// return pinyinName.toString();
return parseTheChineseByObject(discountTheChinese(pinyinName.toString()));
}
/**
* 去除多音字重复数据
*
* @param theStr
* @return
*/
private static List<Map<String, Integer>> discountTheChinese(String theStr) {
// 去除重复拼音后的拼音列表
List<Map<String, Integer>> mapList = new ArrayList<Map<String, Integer>>();
// 用于处理每个字的多音字,去掉重复
Map<String, Integer> onlyOne = null;
String[] firsts = theStr.split(" ");
// 读出每个汉字的拼音
for (String str : firsts) {
onlyOne = new Hashtable<String, Integer>();
String[] china = str.split(",");
// 多音字处理
for (String s : china) {
Integer count = onlyOne.get(s);
if (count == null) {
onlyOne.put(s, new Integer(1));
} else {
onlyOne.remove(s);
count++;
onlyOne.put(s, count);
}
}
mapList.add(onlyOne);
}
return mapList;
}
/**
* 解析并组合拼音,对象合并方案(推荐使用)
*
* @return
*/
private static String parseTheChineseByObject(List<Map<String, Integer>> list) {
Map<String, Integer> first = null; // 用于统计每一次,集合组合数据
// 遍历每一组集合
for (int i = 0; i < list.size(); i++) {
// 每一组集合与上一次组合的Map
Map<String, Integer> temp = new Hashtable<String, Integer>();
// 第一次循环,first为空
if (first != null) {
// 取出上次组合与此次集合的字符,并保存
for (String s : first.keySet()) {
for (String s1 : list.get(i).keySet()) {
String str = s + s1;
temp.put(str, 1);
}
}
// 清理上一次组合数据
if(temp != null && temp.size() > 0){
first.clear();
}
} else {
for (String s : list.get(i).keySet()) {
String str = s;
temp.put(str, 1);
}
}
// 保存组合数据以便下次循环使用
if(temp != null && temp.size() > 0){
first = temp;
}
}
String returnStr = "";
if(first != null){
// 遍历取出组合字符串
for (String str : first.keySet()) {
returnStr += (str + ",");
}
}
if(returnStr.length() > 0){
returnStr = returnStr.substring(0, returnStr.length() - 1);
}
return returnStr;
}
/**
* 解析并组合拼音,循环读取方案(不灵活,不推荐使用)
*
* 现在有如下几个数组: {1,2,3} {4,5} {7,8,9} {5,2,8}
* 要求写出算法对以上数组进行数据组合,如:1475,1472,1478,1485,1482....如此类推,得到的组合刚好是以上数组的最隹组合(不多不少).
* 注:要求有序组合(并非象“全排列算法”那般得到的组合是无序的):组合过程中,第一组数组排第一位、第二组排第二位、第三组排第三位....
*
* @param list
* @return
*/
private static String parseTheChineseByFor(List<Map<String, Integer>> list) {
StringBuffer sbf = new StringBuffer();
int size = list.size();
switch (size) {
case 1:
for (String s : list.get(0).keySet()) {
String str = s;
sbf.append(str + ",");
}
break;
case 2:
for (String s : list.get(0).keySet()) {
for (String s1 : list.get(1).keySet()) {
String str = s + s1;
sbf.append(str + ",");
}
}
break;
case 3:
for (String s : list.get(0).keySet()) {
for (String s1 : list.get(1).keySet()) {
for (String s2 : list.get(2).keySet()) {
String str = s + s1 + s2;
sbf.append(str + ",");
}
}
}
break;
// 此处省略了数据组装过程,组装后的数据结构如下。
// 注:List<Map<String, Integer>> list:List存的就是有多少组数据上面的是4组
// Map就是具体的某一个数组(此处用Map主要是方便对其中数组中重复元素作处理)
// StringBuffer sbf = new StringBuffer();--用于记录组合字符的缓冲器
case 4:
for (String s : list.get(0).keySet()) {
for (String s1 : list.get(1).keySet()) {
for (String s2 : list.get(2).keySet()) {
for (String s3 : list.get(3).keySet()) {
String str = s + s1 + s2 + s3;
// 此处的sbf为StringBuffer
sbf.append(str + ",");
}
}
}
}
break;
case 5:
for (String s : list.get(0).keySet()) {
for (String s1 : list.get(1).keySet()) {
for (String s2 : list.get(2).keySet()) {
for (String s3 : list.get(3).keySet()) {
for (String s4 : list.get(4).keySet()) {
String str = s + s1 + s2 + s3 + s4;
sbf.append(str + ",");
}
}
}
}
}
break;
case 6:
for (String s : list.get(0).keySet()) {
for (String s1 : list.get(1).keySet()) {
for (String s2 : list.get(2).keySet()) {
for (String s3 : list.get(3).keySet()) {
for (String s4 : list.get(4).keySet()) {
for (String s5 : list.get(5).keySet()) {
String str = s + s1 + s2 + s3 + s4 + s5;
sbf.append(str + ",");
}
}
}
}
}
}
break;
case 7:
for (String s : list.get(0).keySet()) {
for (String s1 : list.get(1).keySet()) {
for (String s2 : list.get(2).keySet()) {
for (String s3 : list.get(3).keySet()) {
for (String s4 : list.get(4).keySet()) {
for (String s5 : list.get(5).keySet()) {
for (String s6 : list.get(6).keySet()) {
String str = s + s1 + s2 + s3 + s4 + s5 + s6;
sbf.append(str + ",");
}
}
}
}
}
}
}
break;
}
String str = sbf.toString();
return str.substring(0, str.length() - 1);
}
public static void main(String[] args) {
// 重当参差 重庆的j 刘煜,帕哈丁
String str = "重当参";
System.out.println(converterToFirstSpell(str));
System.out.println(converterToSpell(str));
}
}
使用pinyin4j将汉字转换成拼音需要下载pinyin4j的jar包,这里使用的是pinyin4j-2.5.0.jar;下载地址:http://sourceforge.net/projects/pinyin4j/files/latest/download?source=files
历史各个版本下载地址:http://sourceforge.net/projects/pinyin4j/files/
本文转自:http://hi.baidu.com/stevenblake/item/fc3906eba5dc2bf0fa42ba0b