文本内容处理
import re
pattern = re.compile('xmin = .*\n.*xmax = .*\n.*text = ".*"')
pattern1 = re.compile('xmax = .*')
pattern2 = re.compile('xmin = .*')
# with open('./041.TextGrid') as lines: #一次性读入txt文件,并把内容放在变量lines中
# # m = pattern1.findall(lines)
# print(lines)
content = open('./041.TextGrid').read() # 读取文本内容 为字符串str
m = pattern.findall(content) # 根据正则匹配所有合适的内容放入list
lens = len(m) # 列表长度
# print(type(m)) # 查看类型
for i in m:
temp1 = i.split('\n')[0].strip().split('=')[1] #循环后的值进行切分 list下标为 0,1,2
temp2 = i.split('\n')[1].strip().split('=')[1]
temp3 = i.split('\n')[2].strip().split('=')[1]
# 根据内容剔除不需要的内容
if temp3 == ' "sp"':
continue
elif temp3 == ' "sil"':
pass
elif temp3 == ' "d"':
pass
else:
print('xmin:{}; xmax:{}; text:{}'.format(temp1, temp2, temp3))