import requests from requests.exceptions import RequestException from multiprocessing.dummy import Pool import re def get_one_page(url): try: r = requests.get(url) if r.status_code == 200: return r.text return None except RequestException: return None def parser_one_page(html): pattern = re.compile("<dd>.*?>(\d+)</i>.*?data-src=\"(.*?)\".*?name\">.*?>(.*?)" "<.*?</p>.*?star\">(.*?)" "</p>.*?releasetime\">(.*?)" "</p>.*?integer\">(.*?)</i>.*?fraction\">(.*?)</i>",re.S) links = re.findall(pattern,html) for link in links: yield { 'index':link[0], 'image':link[1], 'title':link[2], 'actor':link[3].strip(), 'time':link[4].strip(), 'score':link[5]+link[6] } def main(page_num): url='https://maoyan.com/board/4?offset='+str(page_num) html = get_one_page(url) links = parser_one_page(html) with open("maoyan.html",'a',encoding="utf-8") as f: f.write('<html>') f.write('<head><meta charset="UTF-8"></head>') f.write('<body>') f.write('<table>') for link in links: f.write("<tr>") f.write("<td>%s</td>" % link['index']) f.write("<td>%s</td>" % link['actor']) f.write("<td>%s</td>" % link['title']) f.write("<td><img src=\"%s\"></td>" % link['image']) f.write("<td>%s</td>" % link['time']) f.write("<td>%s</td>" % link['score']) f.write("</tr>") f.write('</table>') f.write('</body>') f.write('</html>') if __name__ == '__main__': pool=Pool() pool.map(main,[i*10 for i in range(10)])
python_猫眼实例
最新推荐文章于 2023-05-19 11:45:21 发布