import re
from multiprocessing import Pool
import requests
'''
战狼2'''
movie_pattern = re.compile(r'}">(.*?)
\n(.*?)
')def get_page(url):
res = requests.get(url).text
return {'url': url, 'res': res}
def deal_data(res):
print(res['url'], res['res'])
movie_name = movie_pattern.findall(res['res'])
with open('movie_info.txt', 'a', encoding='utf-8') as f:
# print(movie_name)
for item in movie_name:
moviename, mainstar = item
f.write('电影:%s,%s\n' % (moviename, mainstar))
if __name__ == '__main__':
urls = [
'http://maoyan.com/board/7',
'http://maoyan.com/board/6',
'http://maoyan.com/board/1',
'http://maoyan.com/board/2',
'http://maoyan.com/board/4',
]
p = Pool(4)
for url in urls:
p.apply_async(get_page, args=(url,), callback=deal_data)
p.close()
p.join()