import re
from multiprocessing import Pool
import requests
'''
<a href="/films/344264" title="战狼2" data-act="boarditem-click" data-val="{movieId:344264}">战狼2</a>'''
movie_pattern = re.compile(r'}">(.*?)</a></p>\n<p class="star">(.*?)</p>')
def get_page(url):
res = requests.get(url).text
return {'url': url, 'res': res}
def deal_data(res):
print(res['url'], res['res'])
movie_name = movie_pattern.findall(res['res'])
with open('movie_info.txt', 'a', encoding='utf-8') as f:
# print(movie_name)
for item in movie_name:
moviename, mainstar = item
f.write('电影:%s,%s\n' % (moviename, mainstar))
if __name__ == '__main__':
urls = [
'http://maoyan.com/board/7',
'http://maoyan.com/board/6',
'http://maoyan.com/board/1',
'http://maoyan.com/board/2',
'http://maoyan.com/board/4',
]
p = Pool(4)
for url in urls:
p.apply_async(get_page, args=(url,), callback=deal_data)
p.close()
p.join()
Python3之爬虫中回调函数的简单应用
最新推荐文章于 2023-09-26 09:09:44 发布