import java.util.*;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import org.jsoup.helper.StringUtil;
/**
* 汉字转拼音util
*/
public class ChinesePinyinUtil {
static HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
static {
// UPPERCASE:大写(ZHONG)
// LOWERCASE:小写(zhong)
format.setCaseType(HanyuPinyinCaseType.UPPERCASE);// 大写格式
// WITHOUT_TONE:无音标(zhong)
// WITH_TONE_NUMBER:1-4数字表示英标(zhong4)
// WITH_TONE_MARK:直接用音标符(必须WITH_U_UNICODE否则异常) (zhòng)
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 不标声调
// WITH_V:用v表示ü(nv)
// WITH_U_AND_COLON:用"u:"表示ü(nu:)
// WITH_U_UNICODE:直接用ü(nü)
format.setVCharType(HanyuPinyinVCharType.WITH_V);
}
/**
* 获取传入字符串中多音字的首字符缩写,如果多音字的两个的首字母一致,程序就会认为不是多音字,不做处理
*
* @param str 传入字符串
* @return 多音字的首字符缩写, 用\n 分割开来
*/
public static String getPolyphonePinyinInitials(String str) {
if (StringUtil.isBlank(str)) {
return "";
}
List<String> resultList = new ArrayList<>();
char[] strChar = str.toCharArray();
for (char s : strChar) {
List<String> pinyinInitialsList = getPinyinInitials(s);
if (pinyinInitialsList.size() > 1) {
String initialSts = StringUtils.join(pinyinInitialsList, ",");
resultList.add(s + "(" + initialSts + ")");
}
}
return StringUtils.join(resultList, "\n");
}
/**
* 汉字转换位汉语拼音首字母
* 英文字符不变,特殊字符丢失 支持多音字
* 生成方式如(长沙市长:cssc,zssz,zssc,cssz)
*/
public static String getPinyinInitials(String str) {
if (StringUtils.isBlank(str)) {
return "";
}
List<List<String>> resultList = new ArrayList<>();
char[] strChar = str.toCharArray();
for (char s : strChar) {
List<String> result = getPinyinInitials(s);
resultList.add(result);
}
return parseTheChineseByObject(resultList);
}
private static String parseTheChineseByObject(List<List<String>> list) {
Set<String> result = new HashSet<>();
if (CollectionUtils.isNotEmpty(list)) {
result.addAll(list.get(0));
}
int size = list.size();
for (int i = 1; i < size; i++) {
Set<String> compositePYTemp = new HashSet<String>();
for (String pinyinFast : result) {
for (String c : list.get(i)) {
String str = pinyinFast + c;
compositePYTemp.add(str);
}
}
result = compositePYTemp;
}
return StringUtil.join(result,"\n");
}
/**
* 获取首字母并过滤
* @param str
* @return
*/
public static List<String> getPinyinInitials(char str) {
List<String> pinyinInitialsList = new ArrayList<String>();
HanyuPinyinOutputFormat config = new HanyuPinyinOutputFormat();
// 小写
config.setCaseType(HanyuPinyinCaseType.LOWERCASE);
// 没有音调数字
config.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
try {
if (String.valueOf(str).matches("[\\u4E00-\\u9FA5]+")) {
// 转化为拼音,如果是多音字就会都展示出来
String[] pinyinArr = PinyinHelper.toHanyuPinyinStringArray(str, format);
for (String s : pinyinArr) {
// 取首字母
char initials = s.charAt(0);
int index = pinyinInitialsList.indexOf(String.valueOf(initials));
if (index < 0) {
pinyinInitialsList.add(String.valueOf(initials));
}
}
} else {
pinyinInitialsList.add(str + "");
}
} catch (BadHanyuPinyinOutputFormatCombination e1) {
System.out.println("获取的字符串中多音字拼音失败了" + e1.getMessage());
}
return pinyinInitialsList;
}
public static void main(String[] args) throws Exception {
String s1 = getPolyphonePinyinInitials("儿干红藏1");
System.out.println(s1);
System.out.println();
/*String s = getPolyphonePinyinInitials("银行");
System.out.println(s);
System.out.println();
String s2 = getPolyphonePinyinInitials("单县");
System.out.println(s2);
System.err.println(first1); */
String first1 = getPinyinInitials("2红3干4藏1");
System.out.println(first1);
}
汉字转拼音,支持多音字
最新推荐文章于 2023-06-06 23:18:36 发布