正则抽取文本中的时间,并按照出现的先后顺序排序输出
import re
from operator import itemgetter
def time_ex(s):
result = []
result_ = []
pattern1 = re.compile('\d{4}年\d{1,2}月\d{1,2}日')
pattern2 = re.compile('\d{4}年')
pattern3 = re.compile(re.compile('\d{4}年\d{1,2}月'))
pattern4 = re.compile(re.compile('\d{1,2}月\d{1,2}日'))
pattern5 = re.compile(re.compile('\d{1,2}日'))
for i in pattern1.finditer(s):
result.append([i.group(), list(i.span())])
for i in pattern3.finditer(s):
result.append([i.group(), list(i.span())])
for i in pattern2.finditer(s):
result.append([i.group(), list(i.span())])
for i in pattern4.finditer(s):
result.append([i.group(), list(i.span())])
for i in pattern5.finditer(s):
result.append([i.group(), list(i.span())])
print(result)
for m in result:
flag = 1
for n in result:
if m[1] == n[1]:
continue
else:
if (m[1][0]>=n[1][0] and m[1][1]<=n[1][1]):
flag = 0
continue
if flag:
result_.append([m[0],m[1][0]])
result_.sort(key=itemgetter(1))
return result_
q = "1986年3月24日至25日,两国发生军事冲突,利军遭受重大损失,利发誓要进行报复。同年4月2日和4日泛美航空公司的1架班机和德国西柏林的1家舞厅相继被炸,"
import time
ti = time.time()
time_ex(q)
print(time.time() - ti)