#-*-coding:utf8-*-
import re
file=open("D:/资料/山西/data_no_null.txt","r",encoding="utf8")
all_word=[["全水"],["分析水"],["灰分"],["挥发"],["固定碳"],["焦渣特征"],["硫"],["低位热量"]]
for line in file:
# string = "全水22.21,分析水8.06,灰分8.87,挥发33.44,固定碳53.12,焦渣特征2,硫0.82,低位热量5053。"
string=line.strip()
for word_one in all_word:
word_hash = {}
for word in word_one:
results = re.finditer(word, string)
for result in results:
son_string = string[result.span()[1]:]
son_string = re.sub("[\s+\!\/_,$%^*(+\"\')]+|[::+——()?【】“”!,。?、~@#¥%……&*()]+", "", son_string)
pattern = "\d+([.])?(\d)*"
number = re.match(pattern, son_string)
if number != None:
print(string)
word_hash[result.span()[0]] = number.group()
print(word+":" + number.group())
else:
print("没有结果")
# 正则表达式去除前面的字符
# print(list(word_hash.items()))
正则表达式去除中文标点符号并且获取数字
最新推荐文章于 2022-11-01 15:40:29 发布