原文地址:http://www.oschina.net/code/snippet_228898_9674
[文件] DigitUtil.java ~ 4KB 下载(14)
001 | package com.baijob.vsearch.util; |
002 |
003 | import java.util.*; |
004 |
005 | /** |
006 | * 处理数词的工具类 |
007 | * @author liushiquan |
008 | * |
009 | */ |
010 | public class DigitUtil { |
011 | /** |
012 | * 阿拉伯数字 |
013 | */ |
014 | private static Set<Character> araDigits = new HashSet<Character>(); |
015 | /** |
016 | * 汉字中的数字字符 |
017 | */ |
018 | private static char [] SCDigits = { '零' , '一' , '二' , '三' , '四' , '五' , '六' , '七' , '八' , '九' , '十' , '百' , '千' , '万' , '亿' }; |
019 | |
020 | /** |
021 | * 汉字中的大写数字字符 |
022 | */ |
023 | private static char [] TCDigits = { '零' , '壹' , '贰' , '叁' , '肆' , '伍' , '陆' , '柒' , '捌' , '玖' , '拾' , '佰' , '仟' , '万' , '亿' }; |
024 | /** |
025 | * 繁体中文和简体中文的对应关系 |
026 | */ |
027 | private static Map<Character,Character> map = new HashMap<Character,Character>(); |
028 | static { |
029 | for ( int i = 0 ; i < TCDigits.length; i++) { |
030 | map.put(TCDigits[i], SCDigits[i]); |
031 | } |
032 | for ( char i = '0' ; i <= '9' ; i++) { |
033 | araDigits.add(i); |
034 | } |
035 | } |
036 | private DigitUtil(){ |
037 | |
038 | } |
039 | public static void main(String[] args) { |
040 | System.out.println(parseDigits( "零三" )); |
041 | } |
042 | /** |
043 | * 解析中文格式的数字,假定参数中全是汉字,否则会解析异常,解析失败返回null |
044 | * @param hanzi |
045 | * @return |
046 | */ |
047 | public static Integer parseDigits(String hanzi) { |
048 | if (!isDigits(hanzi)) |
049 | return null ; |
050 | int ret; |
051 | try { |
052 | if (hanzi.charAt( 0 ) == '+' ) |
053 | hanzi = hanzi.substring( 1 ); |
054 | |
055 | ret = Integer.parseInt(hanzi); |
056 | } catch (Exception e) { |
057 | |
058 | char [] chars = hanzi.toCharArray(); |
059 | changeTCtoSC(chars); |
060 | |
061 | ret = parse(chars, 0 ,chars.length, 1 ); |
062 | } |
063 | |
064 | return ret; |
065 | } |
066 | public static boolean isDigits(String s) { |
067 | if (s.charAt( 0 ) == '+' ) |
068 | s = s.substring( 1 ); |
069 | try { |
070 | Integer.parseInt(s); |
071 | return true ; |
072 | } catch (Exception e) { |
073 | for ( int i = 0 ; i < s.length(); i++) { |
074 | char c = s.charAt(i); |
075 | if (!map.values().contains(c) && !araDigits.contains(c)) |
076 | return false ; |
077 | } |
078 | |
079 | return true ; |
080 | } |
081 | } |
082 | private static int parse( char [] chars, int start, int end, int preNumber) { |
083 | int ret = 0 ; |
084 | if (start == end) { |
085 | ret = 0 ; |
086 | } else if (start + 1 == end) { |
087 | switch (chars[start]) { |
088 | case '一' : |
089 | case '1' : |
090 | ret = 1 * preNumber; |
091 | break ; |
092 | case '二' : |
093 | case '2' : |
094 | ret = 2 * preNumber; |
095 | break ; |
096 | case '三' : |
097 | case '3' : |
098 | ret = 3 * preNumber; |
099 | break ; |
100 | case '四' : |
101 | case '4' : |
102 | ret = 4 * preNumber; |
103 | break ; |
104 | case '五' : |
105 | case '5' : |
106 | ret = 5 * preNumber; |
107 | break ; |
108 | case '六' : |
109 | case '6' : |
110 | ret = 6 * preNumber; |
111 | break ; |
112 | case '七' : |
113 | case '7' : |
114 | ret = 7 * preNumber; |
115 | break ; |
116 | case '八' : |
117 | case '8' : |
118 | ret = 8 * preNumber; |
119 | break ; |
120 | case '九' : |
121 | case '9' : |
122 | ret = 9 * preNumber; |
123 | break ; |
124 | } |
125 | } else { |
126 | int index; |
127 | if ((index = indexOf(chars,start,end, '零' )) == 0 || (index = indexOf(chars,start,end, '0' )) == 0 ) { |
128 | ret = parse(chars, start + 1 , end, 1 ); |
129 | } else if ((index = indexOf(chars,start,end, '亿' )) != - 1 ) { |
130 | ret = parse(chars, start,index, 1 ) * 100000000 + parse(chars,index + 1 ,end, 10000000 ); |
131 | } else if ((index = indexOf(chars,start,end, '万' )) != - 1 ) { |
132 | ret = parse(chars, start,index, 1 ) * 10000 + parse(chars,index + 1 ,end, 1000 ); |
133 | } else if ((index = indexOf(chars,start,end, '千' )) != - 1 ) { |
134 | ret = parse(chars, start, index, 1 ) * 1000 + parse(chars,index + 1 ,end, 100 ); |
135 | } else if ((index = indexOf(chars,start,end, '百' )) != - 1 ) { |
136 | ret = parse(chars, start, index, 1 ) * 100 + parse(chars,index + 1 ,end, 10 ); |
137 | } else if ((index = indexOf(chars,start,end, '十' )) != - 1 ) { |
138 | ret = parse(chars, start, index, 1 ) * 10 + parse(chars,index + 1 ,end, 1 ); |
139 | } |
140 | |
141 | } |
142 | return ret; |
143 | } |
144 | private static int indexOf( char [] chars, int start, int end, char c) { |
145 | for ( int i = start; i < end; i++) { |
146 | if (chars[i] == c) |
147 | return i; |
148 | } |
149 | return - 1 ; |
150 | } |
151 | /** |
152 | * 将繁体中文转换为简体中文 |
153 | * @param chars |
154 | */ |
155 | private static void changeTCtoSC( char [] chars) { |
156 | for ( int i = 0 ; i < chars.length; i++) { |
157 | Character c = map.get(chars[i]); |
158 | if (c != null ) |
159 | chars[i] = c; |
160 | } |
161 | } |
162 | } |