python爬取猫眼电影排行榜

源码:

import requests
import re
from my_fake_useragent import UserAgent


class CatEyes:
    def __init__(self):
        self.base_url = 'https://maoyan.com/board/7?requestCode=431c4b9e77dc91810dd690cae85c2c87zviy1'
        self.headers = {
            'cookie': '__mta=49700249.1617240671016.1617240929535.1617242500339.4; uuid_n_v=v1; uuid=F046AAB0928911EBB9BA51A9195F549CA1409B435BFD4E41B72A02ED77B5695A; _csrf=4093b4be339d322668b4e0caf9a13cd54ed67389fd503e50dc005cc9b13d88c6; Hm_lvt_703e94591e87be68cc8da0da7cbd0be2=1617240671; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; _lxsdk_cuid=1788b0e42f30-071671f30d761c-5c3f1e49-1fa400-1788b0e42f4c8; _lxsdk=F046AAB0928911EBB9BA51A9195F549CA1409B435BFD4E41B72A02ED77B5695A; __mta=49700249.1617240671016.1617240673940.1617240929535.3; Hm_lpvt_703e94591e87be68cc8da0da7cbd0be2=1617242500; _lxsdk_s=1788b0e42f5-bb1-a20-dd7%7C%7C16',
            "User-Agent": UserAgent().random()
        }
        pass
        self.html = requests.get(headers=self.headers, url=self.base_url).content.decode('utf-8')
        # print(self.html)

    def __call__(self, *args, **kwargs):
        self.main()
        # self.Score()
        pass

    def main(self):

        re_rules = {
            '电影排名': '<dd>\s+<i class=.*?>(.*?)</i>',
            '电影名字': 'title="(.*?)".*/',
            '电影主演': '<p class="star">\s+主演:(\S+)',
            '上映时间': '<p class="releasetime">上映时间:(\S+)'
        }
        movies_list = []
        for field, rule in re_rules.items():
            data = self.findall(rule)
            movies_list.append(data)
        field_list = ['电影排名', '电影名字', '电影主演', '上映时间']
        count = 1
        for i in range(len(movies_list[0]) - 1):
            movies_dict = {}
            for j in range(len(movies_list)):
                movies_dict[field_list[j]] = movies_list[j][i]
            a = self.Score()
            f = f'分数:{a[i]}'
            print(count, movies_dict, f)
            count += 1
        pass

    def findall(self, building_name_rules):
        return re.findall(building_name_rules, self.html)

    def Score(self):
        rules1 = r'<i class="integer">(\S)'
        re1 = re.findall(rules1, self.html)
        rules2 = r'<i class="fraction">(\S)'
        re2 = re.findall(rules2, self.html)
        lst = []
        for i in range(len(re1)):
            cc = f'{re1[i]}.{re2[i]}'
            lst.append(cc)

        return lst


if __name__ == '__main__':
    cat = CatEyes()
    cat()
    pass

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值