openpyxl电影数据导入xlsx

最新推荐文章于 2023-03-25 19:57:56 发布

扣剑书生

最新推荐文章于 2023-03-25 19:57:56 发布

阅读量221

点赞数

本文链接：https://blog.csdn.net/weixin_44038167/article/details/103780890

版权

import requests
from lxml import etree
from openpyxl import Workbook


def get_movies(page):
    url = "https://www.xinpianchang.com/channel/index/type-/sort-like/duration_type-0/resolution_type-/page-%s" % page
    # 获取url中的内容
    response = requests.get(url)

    html_content = response.text

    # 使用xpath进行内容解析
    html = etree.HTML(html_content)
    # 根据规则提取内容
    movies = html.xpath("/html/body/div[8]/div[2]/ul/li")


    # sheet['A1'] = '标题'
    # sheet['B1'] = '图片'
    # sheet['C1'] = '时长'
    # sheet['D1'] = '发布时间'
    # sheet['E1'] = '分类'
    # sheet['F1'] = '播放量'
    # sheet['G1'] = '喜爱量'
    # sheet['H1'] = '描述'

    for movie in movies:
        title = movie.xpath("./div/div[1]/a/p/text()")[0]
        cover_image = movie.xpath("./a/img/@_src")[0]
        durations = movie.xpath("./a/span/text()")
        if durations:
            duration = durations[0]
        else:
            duration = '无信息'
        publish_time = movie.xpath("./a/div[2]/p/text()")[0]
        cate = movie.xpath("./div/div[1]/div[1]/span[1]/text()")[0]
        play_num = movie.xpath("./div/div[1]/div[2]/span[1]/text()")[0]
        like_num = movie.xpath("./div/div[1]/div[2]/span[2]/text()")[0]
        descriptions = movie.xpath("./a/div[2]/div/text()")
        if descriptions:
            description = descriptions[0]
        else:
            description = "描述啥也没有"

        rows.append([title,cover_image,duration,publish_time,cate,play_num,like_num,description])
    print(page)

if __name__ == '__main__':
    wb = Workbook()
    sheet = wb.active
    sheet.title = '电影信息'
    column_title = ['标题', '图片', '时长', '发布时间', '分类', '播放量', '喜爱量', '电影描述']
    rows = []
    rows.append(column_title)
    for i in range(1, 21):
        get_movies(i)
    print(rows)
    for row in rows:
        sheet.append(row)

    wb.save('新片场电影.xlsx')