工具类产生是因为个人业务需求需要根据中文汉字排序,而博主又对网上回答不满意,所以才根据相关资料写了该工具类,写入博客 以备不时之需。直接上代码:
Java汉字转成汉语拼音工具类,需要用到pinyin4j.jar包,博主使用的是Maven项目,在pom.xml中引入依赖
<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.0</version>
</dependency>
工具类:
package cn.stephen.study.demoproject.util;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @program: demo
* @description:
* @author: Wang.zw
* @create: 2020-05-09 16:25
**/
public class ChineseTextToPinyin {
/***
* 将汉字转成拼音(取首字母或全拼)
*
* @param hanzi
* @param flag 是否全拼
* @return
*/
public static String convertHanzi2Pinyin(String hanzi, boolean flag) {
/***
* ^[\u2E80-\u9FFF]+$ 匹配所有东亚区的语言
* ^[\u4E00-\u9FFF]+$ 匹配简体和繁体
* ^[\u4E00-\u9FA5]+$ 匹配简体
*/
String regExp = "^[\u4E00-\u9FFF]+$";
StringBuilder sb = new StringBuilder();
if (hanzi == null || "".equals(hanzi.trim())) {
return "";
}
String pinyin = "";
for (int i = 0; i < hanzi.length(); i++) {
char unit = hanzi.charAt(i);
if (match(String.valueOf(unit), regExp)) { //是汉字,则转拼音
pinyin = convertSingleHanzi2Pinyin(unit);
if (flag) {
sb.append(pinyin);
} else {
sb.append(pinyin.charAt(0));
}
} else {
sb.append(unit);
}
}
return sb.toString();
}
/***
* 将单个汉字转成拼音
*
* @param hanzi
* @return
*/
private static String convertSingleHanzi2Pinyin(char hanzi) {
HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();
outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
String[] res;
StringBuilder sb = new StringBuilder();
try {
res = PinyinHelper.toHanyuPinyinStringArray(hanzi, outputFormat);
sb.append(res[0]);//对于多音字,只用第一个拼音
} catch (Exception e) {
return "";
}
return sb.toString();
}
/***
* @param str 源字符串
* @param regex 正则表达式
* @return 是否匹配
*/
public static boolean match(String str, String regex) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(str);
return matcher.find();
}
}