主要是
class Movie:
def init(self, rank, name, other_name, directors, actors, year, country, kind, star, persons,
quote, img_url):
self.rank = rank
self.name = name
self.other_name = other_name
self.directors = directors
self.actors = actors
self.year = year
self.country = country
self.kind = kind
self.star = star
self.persons = persons
self.quote = quote
self.img_url = img_url
def str(self) -> str:
return “排名: %s\n电影名: %s\n别名: %s\n导演: %s\n演员: %s\n年份: %s\n国家: %s\n类别: %s\n评分: %s\n评价人数: %s\n评价: %s\n” \
% (self.rank, self.name, self.other_name, self.directors, self.actors, self.year, self.country,
self.kind, self.star, self.persons, self.quote)
def toAttrList(self) -> List:
res = [self.rank, self.name, self.other_name, self.directors, self.actors, self.year, self.country,
self.kind, self.star, self.persons, self.quote]
return res
def download_jpg(img_url, img_name=“”):
res = requests.get(img_url, headers=getHeader(),stream=True)
if len(img_name) == 0:
filename = img_url.split(“:”, 1)[1]
filename = filename.replace(“/”, “.”)
else:
filename = img_name
w表示可写, b是字节流
with open(filename, “wb”) as f:
f.write(res.content)
创建表格
wb = openpyxl.Workbook()
创建表格的一个sheet
ws = wb.create_sheet(index=0, title=‘豆瓣电影Top250’)
写入表头
ws.append([“排名”, “电影名”, “别名”, “导演”, “演员”, “年份”, “国家”, “类别”, “评分”, “评价人数”, “评价”])
urls = []
for movie in movies:
将一个 list类型作为表格的一行的写入 excel
ws.append(movie.toAttrList())
保存excel
wb.save(“豆瓣电影Top250统计.xlsx”)
使用os获得操作系统的api,就像操作命令行一样。
os.mkdir(“豆瓣电影Top250统计”)
os.chdir(“豆瓣电影Top250统计”)
# 批量下载图片
os.mkdir(“豆瓣电影Top250图片保存”)
os.chdir(“豆瓣电影Top250图片保存”)
主要运用到 requests、bs4库进行解析网页,提取到我们想要的消息。