#!/usr/bin/python # coding:utf-8 num_str_start_symbol = [u'一', u'二', u'两', u'三', u'四', u'五', u'六', u'七', u'八', u'九', u'十'] more_num_str_symbol = [u'零', u'一', u'二', u'两', u'三', u'四', u'五', u'六', u'七', u'八', u'九', u'十', u'百', u'千', u'万', u'亿'] common_used_numerals = {u'零': 0, u'一': 1, u'二': 2, u'两': 2, u'三': 3, u'四': 4, u'五': 5, u'六': 6, u'七': 7, u'八': 8, u'九': 9, u'十': 10, u'百': 100, u'千': 1000, u'万': 10000, u'亿': 100000000} def chinese2digits(uchars_chinese): total = 0 r = 1 # 表示单位:个十百千... for i in range(len(uchars_chinese) - 1, -1, -1): val = common_used_numerals.get(uchars_chinese[i]) if val >= 10 and i == 0: # 应对 十三 十四 十*之类 if val > r: r = val total = total + val else: r = r * val elif val >= 10: if val > r: r = val else: r = r * val else: total = total + r * val return total def changeChineseNum2Arab(oriStr): lenStr = len(oriStr) aProStr = '' if lenStr == 0: return aProStr hasNumStart = False numberStr = '' for idx in range(lenStr): if oriStr[idx] in num_str_start_symbol: if not hasNumStart: hasNumStart = True numberStr += oriStr[idx] else: if hasNumStart: if oriStr[idx] in more_num_str_symbol: numberStr += oriStr[idx] continue else: numResult = str(chinese2digits(numberStr)) numberStr = '' hasNumStart = False aProStr += numResult aProStr += oriStr[idx] pass if len(numberStr) > 0: resultNum = chinese2digits(numberStr) aProStr += str(resultNum) return aProStr def findAllChineseNum(oriStr): chineseNums = [] lenStr = len(oriStr); if lenStr > 0: hasNumStart = False numberStr = '' for idx in range(lenStr): if oriStr[idx] in num_str_start_symbol: if not hasNumStart: hasNumStart = True numberStr += oriStr[idx] else: if hasNumStart: if oriStr[idx] in more_num_str_symbol: numberStr += oriStr[idx] continue else: chineseNums.append(numberStr) numberStr = '' hasNumStart = False pass if len(numberStr) > 0: chineseNums.append(numberStr) return chineseNums
中文转数字
最新推荐文章于 2024-04-23 09:33:57 发布
本文介绍了一个Python脚本,用于将中文数字字符串转换为阿拉伯数字。重点讲解了`chinese2digits`函数,它处理了中文数字的读法和单位转换。通过实例展示了如何查找并替换整个字符串中的所有中文数字。
摘要由CSDN通过智能技术生成