爬取豆瓣电影top250

代码

#  Author:ZhouChuang
#  coding:utf-8

from bs4 import BeautifulSoup
import requests
import time

headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3514.0 Safari/537.36',
    'Cookie':'viewed="2166211"; bid=wLwzb9b0g_A; douban-fav-remind=1; ll="118173"; __utmc=30149280; __utmc=223695111; _vwo_uuid_v2=D96C'
             '22273BD00491856812822DDB071A2|e5653604c927a32fa93d6e494419f10c; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1533806091%2C%22h'
             'ttps%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DuEf5o7h6W1QgcIPLdxBrM9-O5w1pL72KygnR1F15VN2W7NpRddrICJa95QHW8IHb%26wd%3D%26eqid%3'
             'Dd58b6e88000163dd000000045b6bfbe0%22%5D; _pk_ses.100001.4cf6=*; ps=y; ck=2wWO; __utma=30149280.1177526221.1531553567.1533803'
             '492.1533806181.4; __utmz=30149280.1533806181.4.3.utmcsr=accounts.douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/phone/b'
             'ind; __utma=223695111.801250262.1533803492.1533803492.1533806181.2; __utmz=223695111.1533806181.2.2.utmcsr=accounts.douban.com'
             '|utmccn=(referral)|utmcmd=referral|utmcct=/phone/bind; ap=1; push_noty_num=0; push_doumail_num=0; douban-profile-remind=1; __'
             'utmv=30149280.15261; __utmb=30149280.22.10.1533806181; _pk_id.100001.4cf6=3eba8e0d5047ec4c.1533803492.2.1533806798.1533803530.;'
             ' __utmb=223695111.15.10.1533806181'
}
urls = ['https://movie.douban.com/top250?start={}&filter='.format(str(i)) for i in range(0,275,25)]
url = 'https://movie.douban.com/top250?start=0&filter='
def get(url,data=None):
    wb_data = requests.get(url,headers=headers)
    time.sleep(2)
    Soup = BeautifulSoup(wb_data.text,'lxml')
    paimings =Soup.select('#content > div > div.article > ol > li > div > div.pic > em')
    titles = Soup.select('#content > div > div.article > ol > li > div > div.pic > a > img')
    pingfens = Soup.select('#content > div > div.article > ol > li > div > div.info > div.bd > div > span.rating_num')
    jianpings = Soup.select('#content > div > div.article > ol > li > div > div.info > div.bd > p.quote > span')
    images = Soup.select('#content > div > div.article > ol > li > div > div.pic > a > img')
    # for pingfen in pingfens:
    #     print(pingfen.get_text())
    for title,pingfen,image,paiming,jianping in zip(titles,pingfens,images,paimings,jianpings):
        data = {
            '排名':paiming.get_text(),
            '名称':title.get('alt'),
            '评分':pingfen.get_text(),
            '简评':jianping.get_text(),
            '图片链接':image.get('src')
        }
        print(data)
# print(urls)
for smart in urls:
    get(smart)

结果截图

12581553-f88d69d17ebb46f3.png

截图.png

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值