尝试了两种方法,正则表达式提取效果更佳
#! /usr/bin/env python
# -*- coding: utf-8 -*-
'''
方法1:采用位置参数来提取,效果不佳
'''
##file_data=[]
##with open('待处理文字.txt',encoding='utf-8') as f:
## data=f.read().split(' ')
## file={}
## while(len(data)):
## file['name']=data.pop(0)
## file['director']=data.pop(0)
## file['actor']=data.pop(0)
## file_data.append(dict(file))
##with open('电影列表.txt','w') as f:
## for file in file_data:
## f.write(file['name']+'\n')
## f.write(file['director']+'\n')
## f.write(file['actor']+'\n')
'''
方法2:采用正则表达式,提取很好。
'''
import re
pattern=re.compile(r'\d{1,3}(?P<name>\S+) 导演:(?P<director>[\S ]+) 主演:(?P<actor>[\S ]+)')
with open('待处理文字.txt',encoding='utf-8') as f:
data=f.read()
with open('电影列表.txt','w') as f:
for index,(name,director,actor) in enumerate(pattern.findall(data)):
f.write('No:'+str(index+1)+'\n')
f.write('片名:'+name+'\n')
f.write('导演:'+director+'\n')
f.write('演员:'+actor+'\n')
## print('No:'+str(index+1))
## print('片名:'+name)
## print('导演:'+director)
## print('演员:'+actor)