import requests
from lxml import etree
from openpyxl import Workbook
def get_movies(page):
url = "https://www.xinpianchang.com/channel/index/type-/sort-like/duration_type-0/resolution_type-/page-%s" % page
# 获取url中的内容
response = requests.get(url)
html_content = response.text
# 使用xpath进行内容解析
html = etree.HTML(html_content)
# 根据规则提取内容
movies = html.xpath("/html/body/div[8]/div[2]/ul/li")
# sheet['A1'] = '标题'
# sheet['B1'] = '图片'
# sheet['C1'] = '时长'
# sheet['D1'] = '发布时间'
# sheet['E1'] = '分类'
# sheet['F1'] = '播放量'
# sheet['G1'] = '喜爱量'
# sheet['H1'] = '描述'
for movie in movies:
title = movie.xpath("./div/div[1]/a/p/text()")[0]
cover_image = movie.xpath("./a/img/@_src")[0]
durations = movie.xpath("./a/span/text()")
if durations:
duration = durations[0]
else:
duration = '无信息'
publish_time = movie.xpath("./a/div[2]/p/text()")[0]
cate = movie.xpath("./div/div[1]/div[1]/span[1]/text()")[0]
play_num = movie.xpath("./div/div[1]/div[2]/span[1]/text()")[0]
like_num = movie.xpath("./div/div[1]/div[2]/span[2]/text()")[0]
descriptions = movie.xpath("./a/div[2]/div/text()")
if descriptions:
description = descriptions[0]
else:
description = "描述啥也没有"
rows.append([title,cover_image,duration,publish_time,cate,play_num,like_num,description])
print(page)
if __name__ == '__main__':
wb = Workbook()
sheet = wb.active
sheet.title = '电影信息'
column_title = ['标题', '图片', '时长', '发布时间', '分类', '播放量', '喜爱量', '电影描述']
rows = []
rows.append(column_title)
for i in range(1, 21):
get_movies(i)
print(rows)
for row in rows:
sheet.append(row)
wb.save('新片场电影.xlsx')
openpyxl电影数据导入xlsx
最新推荐文章于 2023-03-25 19:57:56 发布