王一博豆瓣电影海报抓取

代码基本通用,只要换个名字就可以下载到你喜欢的明星电影海报。
直接上代码和效果图,注意要把chromedriver下载下来,然后输入正确的路径信息才行。

# -*- coding: utf-8 -*-
import requests
from lxml import etree
from selenium import webdriver
import os

name = '王一博'

def download(src, id):
    if not os.path.isdir("Xpath的翻页图片包"):
        os.mkdir("Xpath的翻页图片包")
    dir = os.path.join("Xpath的翻页图片包/", str(id) + '.webp')
    try:
        pic = requests.get(src, timeout = 10)
        with open(dir, 'wb') as d:
            d.write(pic.content)
    except requests.exceptions.ConnectionError:
        print("图片无法下载")

def down_load(request_url):
    driver.get(request_url)
    html = etree.HTML(driver.page_source)
    src_xpath = "//div[@class='item-root']/a[@class='cover-link']/img[@class='cover']/@src"
    title_xpath = "//div[@class='item-root']/div[@class='detail']/div[@class='title']/a[@class='title-text']"
    srcs = html.xpath(src_xpath)
    
    titles = html.xpath(title_xpath)
    num = len(srcs)
    if num > 15:
        srcs = srcs[1:]
        titles = titles[1:]

    for src, title in zip(srcs, titles):
        if title is None:
            continue
        print(src)
        download(src, title.text)
    print('OK')
    print(num)
    if num >= 1:
        return True
    else:
        return False
if __name__ == '__main__':
    requests_url = "https://movie.douban.com/subject_search?search_text=" + name
    driver = webdriver.Chrome(executable_path=r'C:\Users\×××\AppData\Local\Google\Chrome\Application\chromedriver.exe')
    driver.get(requests_url)
    html = etree.HTML(driver.page_source)
    print(html)

    base_url = 'https://movie.douban.com/subject_search?search_text=' + name + '&cat=1002&start='
    start = 0
    while start < 70:
        request_url = base_url + str(start)
        flag = down_load(request_url)
        if flag:
            start += 15
        else:
            break
    print("结束")

代码有较强的可移植性,换个名字基本就可以下载。主要方法是利用了Xpath路径翻页查询下载,亲测个别明星可能不使用。而且下载的图片是webp格式,所以这也是一个缺陷,后期慢慢改进!先看效果图吧:
在这里插入图片描述
在这里插入图片描述

发布了38 篇原创文章 · 获赞 5 · 访问量 4096
展开阅读全文

没有更多推荐了,返回首页

©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客

分享到微信朋友圈

×

扫一扫,手机浏览