package com.taidi.nlp.cn.bot.utils;
import java.text.NumberFormat;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.springframework.util.StringUtils;
/**
* 中文转数字
* @author duanyu
*
*/
public class ChineseNumFormat {
private static final Pattern CHINESE_NUM_PATTERN = Pattern.compile(
"(?<yi>[零点一两二三四五六七八九十百千万\\d]+亿)?(?<wan>[零点一两二三四五六七八九十百千\\d]+万)?(?<thousand>[零点一两二三四五六七八九\\d]+千)?(?<hundred>[零点一两二三四五六七八九\\d]+百)?(?<ten>[零点一两二三四五六七八九\\d]*十)?(?<one>[零点一两二三四五六七八九\\d]+)?");
private static final Pattern NUM_UNIT_PATTERN = Pattern.compile("(?<num>[零点一两二三四五六七八九十百千万]+)[亿万千百十]");
private static final Pattern ONE_UNIT_PATTERN = Pattern.compile("(?<num>[零一两二三四五六七八九])");
private static final Pattern NUM_PATTERN = Pattern.compile("[\\d\\.]+");
private static final Pattern HALF_NUM_PATTERN = Pattern.compile("(?<num>[一二三四五六七八九十两\\d]{1,3})(个)?(半)?");
private static final Pattern ONLY_NUM_PATTERN = Pattern.compile("(?<num>[零一二三四五六七八九]+)");
private static final String[] NUMS = {"零", "一", "二", "三", "四", "五", "六", "七", "八", "九" };
private static final String[] UNITS = {"零", "十", "百", "千", "万", "千万", "亿" };
private static final String POINT = "点";
private static final String[] HALFS = {"半", "半个" };
public static void main(String[] args) {
String numberStr = "十三点五";
// Matcher matcher = CHINESE_NUM_PATTERN.matcher(numberStr);
// if (matcher.matches()) {
// System.out.println(matcher.group("yi"));
// System.out.println(matcher.group("wan"));
// System.out.println(matcher.group("thousand"));
// System.out.println(matcher.group("hundred"));
// System.out.println(matcher.group("ten"));
// System.out.println(matcher.group("one"));
// }
NumberFormat nf = NumberFormat.getInstance();
System.out.println(nf.format(chineseToNumber(numberStr)));
System.out.println(halfNumHandle("十一个半"));
}
private enum Unit {
YI("yi"), WAN("wan"), THOUSAND("thousand"), HUNDRED("hundred"), TEN("ten"), ONE("one");
private String name;
private Unit(String name) {
this.name = name;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
}
/**
* 中文字符串转数字
*
* @param numStr
* @return
*/
public static double chineseToNumber(String numStr) {
// 阿拉伯数字字符串处理
Matcher num_matcher = NUM_PATTERN.matcher(numStr);
if (num_matcher.matches()) {
return Double.parseDouble(numStr);
}
// 中文数字转阿拉伯数字
Matcher chinese_num_matcher = CHINESE_NUM_PATTERN.matcher(numStr);
String unit = "";
if (chinese_num_matcher.matches()) {
String yi = chinese_num_matcher.group(Unit.YI.getName()) == null ? UNITS[0]
: chinese_num_matcher.group(Unit.YI.getName());
unit = UNITS[0].equals(yi) ? unit : UNITS[4];
String wan = chinese_num_matcher.group(Unit.WAN.getName()) == null ? UNITS[0]
: chinese_num_matcher.group(Unit.WAN.getName());
unit = UNITS[0].equals(wan) ? unit : UNITS[3];
String thousand =
chinese_num_matcher.group(Unit.THOUSAND.getName()) == null ? UNITS[0]
: chinese_num_matcher.group(Unit.THOUSAND.getName());
unit = UNITS[0].equals(thousand) ? unit : UNITS[2];
String hundred =
chinese_num_matcher.group(Unit.HUNDRED.getName()) == null ? UNITS[0]
: chinese_num_matcher.group(Unit.HUNDRED.getName());
unit = UNITS[0].equals(hundred) ? unit : UNITS[1];
String ten =
chinese_num_matcher.group(Unit.TEN.getName()) == null ? UNITS[0]
: chinese_num_matcher.group(Unit.TEN.getName());
ten = UNITS[1].equals(ten) || "零十".equals(ten) ? "一十" : ten;
String one =
chinese_num_matcher.group(Unit.ONE.getName()) == null ? UNITS[0]
: chinese_num_matcher.group(Unit.ONE.getName());
// 俗语数字处理一万六
if (!one.startsWith(UNITS[0]) && UNITS[0].equals(ten)) {
if (!StringUtils.isEmpty(unit)) {
if (unit.equals(UNITS[4])) {
wan = one + UNITS[5];
}
if (unit.equals(UNITS[3])) {
thousand = one + UNITS[3];
}
if (unit.equals(UNITS[2])) {
hundred = one + UNITS[2];
}
if (unit.equals(UNITS[1])) {
ten = one + UNITS[1];
}
one = UNITS[0];
}
}
double num = getYiNum(yi) + getWanNum(wan) + getThousandNum(thousand)
+ getHundredNum(hundred) + getTenNum(ten) + getOneNum(one);
return num;
}
return 0;
}
/**
* 亿级别数据处理
*
* @param numStr
* @return
*/
private static double getYiNum(String numStr) {
double num = 0;
Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr);
if (matcher.matches() && numStr.endsWith(UNITS[6])) {
num = chineseToNumber(matcher.group("num"));
}
return num * 100000000;
}
/**
* 万级别数据处理
*
* @param numStr
* @return
*/
private static double getWanNum(String numStr) {
double num = 0;
Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr);
if (matcher.matches() && numStr.endsWith(UNITS[4])) {
num = chineseToNumber(matcher.group("num"));
}
return num * 10000;
}
/**
* 千级别数据处理
*
* @param numStr
* @return
*/
private static double getThousandNum(String numStr) {
double num = 0;
Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr);
if (matcher.matches() && numStr.endsWith(UNITS[3])) {
num = chineseToNumber(matcher.group("num"));
}
return num * 1000;
}
/**
* 百级别数据处理
*
* @param numStr
* @return
*/
private static double getHundredNum(String numStr) {
double num = 0;
Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr);
if (matcher.matches() && numStr.endsWith(UNITS[2])) {
num = chineseToNumber(matcher.group("num"));
}
return num * 100;
}
/**
* 十级别数据处理
*
* @param numStr
* @return
*/
private static double getTenNum(String numStr) {
double num = 0;
Matcher matcher = NUM_UNIT_PATTERN.matcher(numStr);
if (matcher.matches() && numStr.endsWith(UNITS[1])) {
num = chineseToNumber(matcher.group("num"));
}
return num * 10;
}
/**
* 个级别数据处理
*
* @param numStr
* @return
*/
private static double getOneNum(String numStr) {
double num = 0;
if (numStr.startsWith(UNITS[0])) {
numStr = numStr.substring(1);
}
// 小数点处理
if (numStr.contains(POINT)) {
String[] split = numStr.split(POINT);
int num1 = (int) chineseToNumber(split[0]);
int num2 = onlyStringNumToInt(split[1]);
String num3 = String.valueOf(num1) + "." + String.valueOf(num2);
return Double.parseDouble(num3);
}
Matcher matcher = ONE_UNIT_PATTERN.matcher(numStr);
if (matcher.matches()) {
num = chineseNumToNum(matcher.group("num"));
}
return num;
}
/**
* 纯中文数字转换,不包含十百
*/
public static int onlyStringNumToInt(String num) {
if (StringUtils.isEmpty(num)) {
return 0;
}
Matcher matcher = ONLY_NUM_PATTERN.matcher(num);
if (matcher.matches()) {
for (int i = 0; i < NUMS.length; i++) {
num = num.replaceAll(NUMS[i], String.valueOf(chineseNumToNum(NUMS[i])));
}
}
return Integer.parseInt(num);
}
private static int chineseNumToNum(String replaceNumber) {
switch (replaceNumber) {
case "一":
return 1;
case "二":
return 2;
case "两":
return 2;
case "三":
return 3;
case "四":
return 4;
case "五":
return 5;
case "六":
return 6;
case "七":
return 7;
case "八":
return 8;
case "九":
return 9;
case "十":
return 10;
case "零":
return 0;
default:
return 0;
}
}
/**
* 对半数的的转换
*
* @param halfNum
* @return
*/
public static double halfNumHandle(String halfNum) {
float num = 0;
Matcher matcher2 = NUM_PATTERN.matcher(halfNum);
if (matcher2.matches()) {
return Float.valueOf(halfNum);
}
if (HALFS[1].equals(halfNum) || HALFS[0].equals(halfNum)) {
num = 0.5f;
return num;
}
Matcher matcher = HALF_NUM_PATTERN.matcher(halfNum);
if (matcher.find()) {
String numHalf = matcher.group("num");
Matcher matcher3 = NUM_PATTERN.matcher(numHalf);
if (matcher3.matches()) {
if (halfNum.contains(HALFS[0])) {
return Float.valueOf(numHalf) + 0.5f;
}
return Float.valueOf(numHalf);
}
int changeNum = (int) chineseToNumber(numHalf);
num = changeNum;
if (halfNum.contains(HALFS[0])) {
num = changeNum + 0.5f;
}
}
return num;
}
}