1、依赖jar包
maven仓库:
<repository>
<id>alimaven</id>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
GAV:
<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.1</version>
</dependency>
2、工具类
package com.xxx.utils;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import org.junit.Test;
import org.springframework.stereotype.Component;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Component
public class PinyinUtil {
@Test
public void test(){
String s="举个栗子,abc123";
System.out.println(getAlpha(s));
System.out.println(getPinYin(s));
System.out.println(cleanChar(s));
//JGLZabc123
//jugeliziabc123
//举个栗子abc123
}
/**
* 获得汉语拼音首字母
*
* 例: 举个栗子,abc123
* 返回值:JGLZabc123
*/
public static String getAlpha(String chines) {
chines = cleanChar(chines);
String pinyinName = "";
char[] nameChar = chines.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.UPPERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
for (int i = 0; i < nameChar.length; i++) {
if (nameChar[i] > 128) {
try {
pinyinName += PinyinHelper.toHanyuPinyinStringArray(nameChar[i], defaultFormat)[0].charAt(0);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pinyinName += nameChar[i];
}
}
return pinyinName;
}
/**
* 将字符串中的中文转化为拼音,英文字符不变
*
* 例: 举个栗子,abc123
* 返回值: jugeliziabc123
*/
public static String getPinYin(String inputString) {
inputString = cleanChar(inputString);
HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
format.setVCharType(HanyuPinyinVCharType.WITH_V);
String output = "";
if (inputString != null && inputString.length() > 0 && !"null".equals(inputString)) {
char[] input = inputString.trim().toCharArray();
try {
for (int i = 0; i < input.length; i++) {
if (java.lang.Character.toString(input[i]).matches("[\\u4E00-\\u9FA5]+")) {
String[] temp = PinyinHelper.toHanyuPinyinStringArray(input[i], format);
output += temp[0];
} else {
output += java.lang.Character.toString(input[i]);
}
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
return "*";
}
return output;
}
/**
* 清理特殊字符以便得到
*
* 例: 举个栗子,abc123
* 返回值: 举个栗子abc123
*/
public static String cleanChar(String chines) {
chines = chines.replaceAll("[\\p{Punct}\\p{Space}]+", ""); // 正则去掉所有字符操作
// 正则表达式去掉所有中文的特殊符号
String regEx = "[`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}<>《》【】‘;:”“’。,、?]";
Pattern pattern = Pattern.compile(regEx);
Matcher matcher = pattern.matcher(chines);
chines = matcher.replaceAll("").trim();
return chines;
}
}