想法
在实际排序中对中文数字排序不友好的问题
- 找到对应中文数字或者阿拉伯数字字符串
- 转换成对应的阿拉伯字符串并将两个字符串长度对齐
- 排序
代码实现如下:
package cn.Test;
import java.text.Collator;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Mr.YG
* @date 2023-10-29 17:31
* @description
*/
public class ChineseNumberConverter {
static Collator collator = Collator.getInstance(Locale.CHINA);
// 正则表达式匹配中文数字和阿拉伯数字
static Pattern cnPattern = Pattern.compile("[零一二两三四五六七八九十百千万]+");
static Pattern arPattern = Pattern.compile("\\d+");
static final Map<Character, Integer> digitMap = new HashMap<>();
static {
digitMap.put('零', 0);
digitMap.put('一', 1);
digitMap.put('二', 2);
digitMap.put('两', 2);
digitMap.put('三', 3);
digitMap.put('四', 4);
digitMap.put('五', 5);
digitMap.put('六', 6);
digitMap.put('七', 7);
digitMap.put('八', 8);
digitMap.put('九', 9);
digitMap.put('十', 10);
digitMap.put('百', 100);
digitMap.put('千', 1000);
digitMap.put('万', 10000);
}
private static int convertChineseNumberToInteger(String chineseNumber) {
int aHundredMillion = 0;
int result = 0;
int temp = 0;
int lastDigit = 0;
for (int i = 0; i < chineseNumber.length(); i++) {
char c = chineseNumber.charAt(i);
int digit = digitMap.get(c);
if (digit < 10) {
lastDigit = digit;
} else {
switch (digit) {
case 10: // 十
if (lastDigit == 0) {
temp = 10;
result += temp;
} else {
temp = lastDigit * 10;
result += temp;
lastDigit = 0;
}
break;
case 100: // 百
temp = lastDigit * 100;
result += temp;
lastDigit = 0;
break;
case 1000: // 千
temp = lastDigit * 1000;
result += temp;
lastDigit = 0;
break;
case 10000: // 万
temp = (result + lastDigit) * 10000;
result = 0;
lastDigit = 0;
break;
}
}
}
result += lastDigit + aHundredMillion;
return result;
}
private static String getMatch(String input, Pattern pattern) {
Matcher matcher = pattern.matcher(input);
if (matcher.find()) {
return matcher.group();
}
return "";
}
/**
* 字符串替拼接
*
* @param s 原始字符串
* @param start 起始位置
* @param end 截止位置
* @param rep 要替换的值
* @return
*/
private static String replaceString(String s, int start, int end, String rep) {
StringBuffer stringBuffer = new StringBuffer(s);
String s1 = stringBuffer.substring(0, start);
String s2 = stringBuffer.substring(end);
return s1 + rep + s2;
}
/**
* 字符串长度拼接到等长
*
* @param cn1 数字字符串1
* @param cn2 数字字符串2
* @param pattern 正则验证规则
* @return
*/
private static List<String> strToIntString(String cn1, String cn2, Pattern pattern) {
List<String> stringList = new ArrayList<>();
String n1 = "";
String n2 = "";
if (cnPattern.toString().equals(pattern.toString())) {
n1 = String.valueOf(convertChineseNumberToInteger(getMatch(cn1, pattern)));
n2 = String.valueOf(convertChineseNumberToInteger(getMatch(cn2, pattern)));
} else if (arPattern.toString().equals(pattern.toString())) {
n1 = String.valueOf(getMatch(cn1, pattern));
n2 = String.valueOf(getMatch(cn2, pattern));
} else {
n1 = cn1;
n2 = cn2;
}
// 比较长度 谁长给谁补0 相等不动
if (n1.length() > n2.length()) {
StringBuffer stringBuffer = new StringBuffer();
for (int i = 0; i < n1.length() - n2.length(); i++) {
stringBuffer.append("0");
}
n2 = stringBuffer.append(n2).toString();
} else if (n1.length() == n2.length()) {
return Arrays.asList(n1, n2);
} else {
StringBuffer stringBuffer = new StringBuffer();
for (int i = 0; i < n2.length() - n1.length(); i++) {
stringBuffer.append("0");
}
n1 = stringBuffer.append(n1).toString();
}
stringList.add(n1);
stringList.add(n2);
return stringList;
}
public static int compareNew(String s1, String s2) {
// 1.中文数字转阿拉伯数字
Matcher matcherStr1 = cnPattern.matcher(s1);
Matcher matcherStr2 = cnPattern.matcher(s2);
// 2.获取匹配的起始位置是否相同
if (matcherStr1.find() && matcherStr2.find() && matcherStr1.start() == matcherStr2.start()) {
// 开始匹配中文数字并转换成阿拉伯数字占用位数匹配
List<String> stringList = strToIntString(s1, s2, cnPattern);
s1 = replaceString(s1, matcherStr1.start(), matcherStr1.end(), stringList.get(0));
s2 = replaceString(s2, matcherStr2.start(), matcherStr2.end(), stringList.get(1));
compareNew(s1, s2);
}
Matcher matcherStrToNum1 = arPattern.matcher(s1);
Matcher matcherStrToNum2 = arPattern.matcher(s2);
// 3.匹配阿拉伯数字
if (matcherStrToNum1.find() && matcherStrToNum2.find() && matcherStrToNum1.start() == matcherStrToNum2.start()) {
// 开始匹配中文数字并转换成阿拉伯数字占用位数匹配
List<String> stringList = strToIntString(s1, s2, arPattern);
s1 = replaceString(s1, matcherStrToNum1.start(), matcherStrToNum1.end(), stringList.get(0));
s2 = replaceString(s2, matcherStrToNum2.start(), matcherStrToNum2.end(), stringList.get(1));
}
// 按中文字典顺序比较
int result = collator.compare(s1, s2);
return result;
}
}
测试如下:
public static void main(String[] args) {
List<String> stringList = Arrays.asList("1.1文件夹第一第五",
"1.22.2文件夹",
"1.21文件夹",
"文件夹第一",
"文件夹第十一",
"文件夹第二十一",
"1.22.1文件夹",
"1.21文件夹",
"1.23文件夹");
List<String> collect = stringList.stream().sorted(new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
return compareNew(o1, o2);
}
}).collect(Collectors.toList());
collect.forEach(System.out::println); public static void main(String[] args) {
}
结果如下:
1.1文件夹第一第五
1.21文件夹
1.21文件夹
1.22.1文件夹
1.22.2文件夹
1.23文件夹
文件夹第一
文件夹第十一
文件夹第二十一