Python---原生爬虫，函数式编程，查看最新热门电影排行榜

最新推荐文章于 2023-03-28 11:28:56 发布

chdxia

最新推荐文章于 2023-03-28 11:28:56 发布

阅读量2.1k

点赞数

本文链接：https://blog.csdn.net/xcd122040356/article/details/82766233

版权

from urllib import request as r
import re

class Spider():
    url='https://movie.douban.com/chart'
    root_pattern='<tr class="item">(.*?)</table>'
    movie_name_pattern='<a class="nbg" href=".*?"  title="(.*?)">'
    movie_score_pattern='<span class="rating_nums">(.*?)</span>'
    movie_number_pattern='<span class="pl">(.*?)</span>'
   
    def __content(self):#读取内容
        r1=r.urlopen(Spider.url)
        htmls=r1.read()
        htmls=str(htmls,encoding='utf-8')
        return htmls
        
    def __analysis(self,htmls):#分析
        root_html=re.compile(Spider.root_pattern,re.S).findall(htmls)
        informations=[]
        for html in root_html:
            name=re.compile(Spider.movie_name_pattern,re.S).findall(html)
            score=re.compile(Spider.movie_score_pattern,re.S).findall(html)
            number=re.compile(Spider.movie_number_pattern,re.S).findall(html)
            information={'name':name,'score':score,'number':number}
            informations.append(information)
        return informations

    def __refine(self,informations):#精炼
        l=lambda informations:{
                'name':informations['name'][0].strip,
                'score':informations['score'][0].strip,
                'number':informations['number'][0].strip
                }
        return map(l,informations)

    def go(self):#运行
        htmls=self.__content()
        informations=self.__analysis(htmls)
        #informations=list(self.__refine(informations))
        print(informations)

Spider=Spider()
Spider.go()

chdxia

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
Python---原生爬虫，函数式编程，查看最新热门电影排行榜

from urllib import request as rimport reclass Spider(): url='https://movie.douban.com/chart' root_pattern='&lt;tr class="item"&gt;(.*?)&lt;/table&gt;' movie_name_pattern='&lt;a class="...
复制链接

扫一扫