使用正则表达式处理带有不同单位的数字字符串,并提取数字值和单位。
def filter_non_zero_items(input_list):
result = []
for a in input_list:
not_zero = False
for a_item in a:
if a_item != '0':
not_zero = True
break
if not_zero:
result.append(a)
return result
list_a = ['104066666', '', '00', '0000', '', '', '']
print(filter_non_zero_items(list_a))
import re
num_unit_str_list = [
'104066666.00 PC',
'30099999.00 SH',
'2155555.00 SH',
'3434.00 SH',
'21999995.00 SH',
'1.066650 SH',
'4 X 2500.000 KG',
'20 X 5000.000 KG',
'10 X 2500 吨',
'1500888999 L',
'99666',
'2 X 1.11 kg'
]
for s1 in num_unit_str_list:
pattern = r'[1-9]\d*\0.\d|\d*|0.\d*[1-9]\d*'
re_res = re.findall(pattern, s1)
obj = [i for i in re_res if i]
obj = filter_non_zero_items(obj)
res = 0
uom = ''
if s1.split(' ') and not re.findall(r'\d+', s1.split(' ')[-1]):
uom = s1.split(' ')[-1]
else:
uom = '无单位'
if len(obj) == 1:
res = float(obj[0])
elif len(obj) == 2:
res = float(f"{obj[0]}") * float(f"{obj[1]}")
elif len(obj) == 3:
res = float(f"{obj[0]}") * float(f"{obj[1]}.{obj[2]}")
print(s1, res, uom, len(obj))