jar包
<dependency>
<groupId>com.belerweb</groupId>
<artifactId>pinyin4j</artifactId>
<version>2.5.1</version>
</dependency>
package com.*.utils;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;
import java.text.Collator;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author ***
* @Title: PinYinUtils
* @Description:
* @Copyright: *******
* @Company: *******
* @Date 2020-05-18-16:26
*/
@Slf4j
public class PinYinUtils {
private static final Pattern CHINESE_PATTERN = Pattern.compile("[\u4e00-\u9fa5]");
/**
* 判读字符串是否存在中文
*
* @param str
* @return
*/
public static boolean isContainChinese(String str) {
Matcher m = CHINESE_PATTERN.matcher(str);
if (m.find()) {
return true;
}
return false;
}
/**
* 获取汉字串拼音,英文字符不变
*
* @param str
* @return
*/
public static String getAlphabet(String str) {
StringBuffer pybf = new StringBuffer();
char[] arr = str.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
// 输出拼音全部小写
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
// 不带声调
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
//ü转换为v
defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
for (int i = 0; i < arr.length; i++) {
if (arr[i] > 128) {
try {
pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat)[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
log.error("获取中文字符串拼音异常", e);
}
} else {
pybf.append(arr[i]);
}
}
return pybf.toString();
}
/**
* 开始排序
*
* @param data
* @return
*/
public static List<String> sort(List<String> data) {
if (data == null || data.size() == 0) {
return data;
}
Comparator<Object> comparator = Collator.getInstance(Locale.CHINA);
Collections.sort(data, comparator);
return data;
}
//测试
public static void main(String[] args) {
List<String> list = new ArrayList<>();
list.add("H3C");
list.add("hangming");
list.add("汉明");
list.add("han明");
list.add("汉ming");
list.add("烽火");
list.add("烽火");
list.add("汉明");
list.add("han明");
list.add("汉明");
list.add("aaa");
list.add("111");
list.add("哎");
System.out.println(list);
//先排序解决同音字问题
list = sort(list);
System.out.println(list);
List<String> sortList = new ArrayList<>();
for (int i = 0; i < list.size(); i++) {
String bir = Long.toBinaryString(i);
String s = list.get(i);
if (isContainChinese(s)) {
s = getAlphabet(s) + bir;
} else {
s = s + bir;
}
sortList.add(s);
}
sortList = sort(sortList);
System.out.println(sortList);
}
}
//结果:
//[H3C, hangming, 汉明, han明, 汉ming, 烽火, 烽火, 汉明, han明, 汉明, aaa, 111, 哎]
//[111, aaa, H3C, hangming, han明, han明, 哎, 烽火, 烽火, 汉ming, 汉明, 汉明, 汉明]
//[1110, aaa1, ai110, fenghuo1000, fenghuo111, H3C10, hangming11, hanming100, hanming1001, hanming101, hanming1010, hanming1011, hanming1100]