python爬虫电影排行榜-窗口

淡然_依旧

已于 2023-10-25 17:04:25 修改

阅读量104

点赞数

分类专栏：爬虫文章标签： 1024程序员节 python 爬虫

于 2023-10-24 08:50:50 首次发布

本文链接：https://blog.csdn.net/2301_80124762/article/details/134003416

版权

爬虫专栏收录该内容

2 篇文章 0 订阅

订阅专栏

文章目录

1 本文介绍
2 下载和导入相关的库
3 定义一个类方法
4 设置窗口
5 创建爬取函数
6 设置参数
7 发起请求获取响应，转换格式
8 持久化存储
9 爬取后显示爬取文件的路径和弹出文件。
10 调用
完整代码

1 本文介绍

本文将会创建一个窗口，实现窗口选择电影类型和数量，在网站上获取到本地文件。
完整代码见文末。

2 下载和导入相关的库

使用pip install 包名的命令下载库。
使用import导入库

# 下载和导入库
import tkinter as tk
import requests
import json
import os

3 定义一个类方法

class MyWindow(tk.Tk):定义一个名为MyWindow的类，继承自tk.Tk类。
def init(self):初始化方法，会在创建MyWindow类的实例时自动调用。
super().init()调用父类(tk.Tk)的初始化方法，确保父类属性和逻辑正确初始化。

class MyWindow(tk.Tk):
    def __init__(self):
        super().__init__()

4 设置窗口

创建窗口的标题为电影排行榜。
设置窗口的大小。
创建标签，显示可选项。
创建类型和数量的标签以及输入的文本框。
创建按钮绑定事件为get_movie_ranking函数。
创建结果显示标签。

        self.title("电影排行榜")
        self.geometry("800x350")

        self.label3 = tk.Label(self, text='豆瓣网可查找的类型有：\n科幻  恐怖  喜剧  剧情  动作  爱情  动画  悬疑  惊悚  纪录片  短片  情色  音乐\n歌舞  家庭  儿童  传记  历史  战争  犯罪  西部  奇幻  冒险  灾难  武侠  古装  运动  黑色电影')
        self.label3.place(x=150, y=10)

        self.label1 = tk.Label(self, text="类型：")
        self.label1.place(x=250, y=100)
        self.label2 = tk.Label(self, text="数量：")
        self.label2.place(x=250, y=150)

        self.type_entry = tk.Entry(self)
        self.type_entry.place(x=320, y=100)
        self.num_entry = tk.Entry(self)
        self.num_entry.place(x=320, y=150)

        self.button = tk.Button(self, text="获取排行榜", command=self.get_movie_ranking)
        self.button.place(x=350, y=200)

        # 添加一个label_result标签
        self.label_result = tk.Label(self, text="")
        self.label_result.place(x=250, y=250)

5 创建爬取函数

首先定义一个命名为get_movie_ranking的函数。
设置请求头。

    def get_movie_ranking(self):
        # 设置请求头
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
                          "(KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53"
        }

6 设置参数

获取文本框输入的类型和数量。
创建所有类型的字典，通过输入的类型索引对应参数的数字。
如果输入的类型不在字典中，结果标签就会显示无效的类型。
定义请求网站所带参数的字典。
将输入的内容转化为参数传入字典。

        typess = self.type_entry.get()  # 获取用户输入的电影类型
        nums = self.num_entry.get()  # 获取用户输入的数量

        qu = {
            '科幻': 17,
            '恐怖': 20,
            '喜剧': 24,
            '剧情': 11,
            '动作': 5,
            '爱情': 13,
            '动画': 25,
            '悬疑': 10,
            '惊悚': 19,
            '纪录片': 1,
            '短片': 23,
            '情色': 6,
            '音乐': 14,
            '歌舞': 7,
            '家庭': 28,
            '儿童': 8,
            '传记': 2,
            '历史': 4,
            '战争': 22,
            '犯罪': 3,
            '西部': 27,
            '奇幻': 16,
            '冒险': 15,
            '灾难': 12,
            '武侠': 29,
            '古装': 30,
            '运动': 18,
            '黑色电影': 31,
        }
        types = qu.get(typess)  # 根据输入的类型获取对应的数值

        if not types:
            self.label_result.config(text="无效的类型")
            return

        params = {
            'type': types,
            'interval_id': '100:90',
            'action': '',
            'start': '0',
            'limit': nums,
        }

7 发起请求获取响应，转换格式

通过get函数对网站进行请求，对请求传入url，传入参数，请求头三个参数。获取响应转换为字符串。
使用json.loads的函数解析响应。

        # 发送请求
        response = requests.get("https://movie.douban.com/j/chart/top_list", params=params, headers=headers)
        page_text = response.text
        data = json.loads(page_text)

8 持久化存储

首先获取程序的当前目录。
定义文件的绝对路径和文件名字以及后缀名。
在当前目录创建存储的文件，设置为覆写模式，编码方式为utf-8，设别名为fp。
遍历每一个电影的信息，逐行写入文件。
设置文件名，结果标签显示保存在设置的文件名成功信息。

        current_directory = os.path.dirname(os.path.realpath(__file__))  # 获取当前程序所在目录

        file_path = os.path.join(current_directory, '豆瓣' + typess + '电影排行榜' + '.txt')  # 修改文件路径为绝对路径

        with open(file_path, 'w', encoding='utf-8') as fp:
            for item in data:
                nub = item["rank"]
                jpg = item["cover_url"]
                typee = ",".join(item["types"])
                country = ",".join(item["regions"])
                name = item["title"]
                ht = item["url"]
                tim = item["release_date"]
                shu = item["actor_count"]
                ren = item["vote_count"]
                hao = item["score"]
                yan = ",".join(item["actors"])
                fp.write("\n第{}个\n电影名： {}\n{}人评价      评分： {}\n类型： {}\n国家： {}\n上映时间： {}\n演员数量： {}\n演员名单： {}\n详情网址： {}\n封面网址： {}\n\n".format(nub, name, ren, hao, typee, country, tim, shu, yan, ht, jpg))

        file_name = '豆瓣' + typess + '电影排行榜' + '.txt'
        self.label_result.config(text="排行榜已保存至文件: {}".format(file_name))

9 爬取后显示爬取文件的路径和弹出文件。

设置标签及位置，显示文件存入的路径。
设置该标签的文本框，显示存储的文件路径。
使用默认应用程序打开文件。

        # 添加一个显示文件路径的标签
        self.label4 = tk.Label(self, text="文件存入的路径：")
        self.label4.place(x=80, y=300)
        self.file_entry = tk.Entry(self)
        self.file_entry.insert(tk.END, file_path)  # 将文件路径插入到Entry控件中
        self.file_entry.place(x=180, y=300)
        self.file_entry.configure(width=85)

        os.startfile(file_path)  # 使用默认应用程序打开文件

10 调用

创建主程序的入口。
创建一个名为window的MyWindow对象。
进入窗口的主事件循环，等待用户交互和响应。

if __name__ == "__main__":
    window = MyWindow()
    window.mainloop()

完整代码

# 下载和导入库
import tkinter as tk
import requests
import json
import os


# 定义一个主类
class MyWindow(tk.Tk):
    def __init__(self):
        super().__init__()
        self.title("电影排行榜")
        self.geometry("800x350")

        self.label3 = tk.Label(self, text='豆瓣网可查找的类型有：\n科幻  恐怖  喜剧  剧情  动作  爱情  动画  悬疑  惊悚  纪录片  短片  情色  音乐\n歌舞  家庭  儿童  传记  历史  战争  犯罪  西部  奇幻  冒险  灾难  武侠  古装  运动  黑色电影')
        self.label3.place(x=150, y=10)

        self.label1 = tk.Label(self, text="类型：")
        self.label1.place(x=250, y=100)
        self.label2 = tk.Label(self, text="数量：")
        self.label2.place(x=250, y=150)

        self.type_entry = tk.Entry(self)
        self.type_entry.place(x=320, y=100)
        self.num_entry = tk.Entry(self)
        self.num_entry.place(x=320, y=150)

        self.button = tk.Button(self, text="获取排行榜", command=self.get_movie_ranking)
        self.button.place(x=350, y=200)

        # 添加一个label_result标签
        self.label_result = tk.Label(self, text="")
        self.label_result.place(x=250, y=250)



    def get_movie_ranking(self):
        # 设置请求头
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
                          "(KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53"
        }

        typess = self.type_entry.get()  # 获取用户输入的电影类型
        nums = self.num_entry.get()  # 获取用户输入的数量

        qu = {
            '科幻': 17,
            '恐怖': 20,
            '喜剧': 24,
            '剧情': 11,
            '动作': 5,
            '爱情': 13,
            '动画': 25,
            '悬疑': 10,
            '惊悚': 19,
            '纪录片': 1,
            '短片': 23,
            '情色': 6,
            '音乐': 14,
            '歌舞': 7,
            '家庭': 28,
            '儿童': 8,
            '传记': 2,
            '历史': 4,
            '战争': 22,
            '犯罪': 3,
            '西部': 27,
            '奇幻': 16,
            '冒险': 15,
            '灾难': 12,
            '武侠': 29,
            '古装': 30,
            '运动': 18,
            '黑色电影': 31,
        }
        types = qu.get(typess)  # 根据输入的类型获取对应的数值

        if not types:
            self.label_result.config(text="无效的类型")
            return

        params = {
            'type': types,
            'interval_id': '100:90',
            'action': '',
            'start': '0',
            'limit': nums,
        }

        # 发送请求
        response = requests.get("https://movie.douban.com/j/chart/top_list", params=params, headers=headers)
        page_text = response.text
        data = json.loads(page_text)

        current_directory = os.path.dirname(os.path.realpath(__file__))  # 获取当前脚本所在目录

        file_path = os.path.join(current_directory, '豆瓣' + typess + '电影排行榜' + '.txt')  # 修改文件路径为绝对路径

        with open(file_path, 'w', encoding='utf-8') as fp:
            for item in data:
                nub = item["rank"]
                jpg = item["cover_url"]
                typee = ",".join(item["types"])
                country = ",".join(item["regions"])
                name = item["title"]
                ht = item["url"]
                tim = item["release_date"]
                shu = item["actor_count"]
                ren = item["vote_count"]
                hao = item["score"]
                yan = ",".join(item["actors"])
                fp.write("\n第{}个\n电影名： {}\n{}人评价      评分： {}\n类型： {}\n国家： {}\n上映时间： {}\n演员数量： {}\n演员名单： {}\n详情网址： {}\n封面网址： {}\n\n".format(nub, name, ren, hao, typee, country, tim, shu, yan, ht, jpg))

        file_name = '豆瓣' + typess + '电影排行榜' + '.txt'
        self.label_result.config(text="排行榜已保存至文件: {}".format(file_name))

        # 添加一个显示文件路径的标签
        self.label4 = tk.Label(self, text="文件存入的路径：")
        self.label4.place(x=80, y=300)
        self.file_entry = tk.Entry(self)
        self.file_entry.insert(tk.END, file_path)  # 将文件路径插入到Entry控件中
        self.file_entry.place(x=180, y=300)
        self.file_entry.configure(width=85)

        os.startfile(file_path)  # 使用默认应用程序打开文件


if __name__ == "__main__":
    window = MyWindow()
    window.mainloop()