《叶问4》完结篇影评词云统计分析

1. 影评地址

https://movie.douban.com/subject/26885074/reviews?start=0

2. 获取影评数据

豆瓣反爬比较严重,单线程就好了

# @Time : 2020/1/15 14:52
# @Author : GKL
# FileName : spider.py
# Software : PyCharm

import requests
from lxml import etree
import json
import time


class Spider(object):
    def __init__(self):
        # self.url = 'https://movie.douban.com/subject/26885074/reviews?start=0'
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
        }

    def get_data(self, url):
        """
        获取数据并做持久化存储
        :param url: 
        :return: 
        """
        response = requests.get(url, headers=self.headers).content.decode('utf-8')
        
        # xpath 对象
        page = etree.HTML(response)
        
        # 获取所有数据节点
        node_list = page.xpath('//div[@class="review-list  "]/div')

        for node in node_list:
            
            # 作者
            author = node.xpath('.//header[@class="main-hd"]//a[2]/text()')[0]
            
            # 评论
            text = node.xpath('string(.//div[@class="main-bd"]//div[@class="short-content"])')

            print(author)
            items = {
                'author': author,
                'text': text.strip()
            }
            
            # 持久化存储
            with open('yewen.json', 'a', encoding='utf-8') as f:
                f.write(json.dumps(items, ensure_ascii=False) + '\n')


    def run(self):
        """
        翻页及运行逻辑
        :return: 
        """
        for i in range(1, 47):
            url = 'https://movie.douban.com/subject/26885074/reviews?start={}'.format(i*20)
            print('正在爬取第{}页'.format(i))
            self.get_data(url)
            time.sleep(3)


if __name__ == '__main__':
    s = Spider()
    s.run()

3. 制作词云图
import jieba
from wordcloud import WordCloud
import json


f = open("yewen.json", "r", encoding="utf-8")
data_list = f.readlines()
str = ''
for data in data_list:
    text = json.loads(data)['text']
    str += text

# 替换无关紧要的词语
result_str = str.replace('展开', '').replace('这篇', '')\
    .replace('影评', '').replace('电影', '').replace('这部', '').replace('可能', '').replace('剧情', '')

cut_text = jieba.lcut(result_str)
result = " ".join(cut_text)
wc = WordCloud(
    font_path='simhei.ttf',  # 字体
    background_color="white",  # 背景色
    max_words=600,  # 最大词数
    width=1000,  # 输出宽度
    height=1000,
    # 字的尺寸限制
    min_font_size=20,
    max_font_size=100,
    # mask= plt.imread('snake.jpg')  # 背景图片
)
wc.generate(result)  # 转化为词云的操作
wc.to_file("test.jpg")  # 保存

f.close()

在这里插入图片描述

import javafx.application.Application; import javafx.geometry.Pos; import javafx.scene.Scene; import javafx.scene.control.Button; import javafx.scene.control.ComboBox; import javafx.scene.control.Label; import javafx.scene.control.TextField; import javafx.scene.layout.GridPane; import javafx.stage.Stage; public class MovieTicketSystem extends Application { // 创建并初始化折扣对象 Discount[] discounts = { new StudentDiscount(), new ChildrenDiscount(), new VIPDiscount() }; @Override public void start(Stage primaryStage) throws Exception { // 设置窗口标题 primaryStage.setTitle("电影票销售系统"); // 创建Grid布局 GridPane gridPane = new GridPane(); gridPane.setAlignment(Pos.CENTER); gridPane.setHgap(10); gridPane.setVgap(10); // 创建UI控件 Label movieLabel = new Label("选择电影:"); ComboBox<String> movieComboBox = new ComboBox<>(); movieComboBox.getItems().addAll("功夫熊猫", "叶问", "疯狂动物城"); movieComboBox.setValue("功夫熊猫"); Label discountLabel = new Label("选择优惠方式:"); ComboBox<String> discountComboBox = new ComboBox<>(); discountComboBox.getItems().addAll("学生优惠", "儿童优惠", "VIP优惠"); discountComboBox.setValue("学生优惠"); Label priceLabel = new Label("原价:50元"); Label finalPriceLabel = new Label("最终价格:"); TextField finalPriceField = new TextField(); finalPriceField.setEditable(false); Button calculateButton = new Button("计算价格"); calculateButton.setOnAction(e -> { // 获取用户选择的电影和优惠方式 String movie = movieComboBox.getSelectionModel().getSelectedItem(); String discount = discountComboBox.getSelectionModel().getSelectedItem(); // 根据用户选择设置票价和折扣对象 MovieTicket mt = new MovieTicket(); if (movie.equals("功夫熊猫")) { mt.setPrice(60); } else if (movie.equals("叶问")) { mt.setPrice(70); } else if (movie.equals("疯狂动物城")) { mt.setPrice(80); } else { finalPriceField.setText("请选择正确的电影!"); return; } if (discount.equals("学生优惠")) { mt.setDiscount(discounts[0]); } else if (discount.equals("儿童优惠")) { mt.setDiscount(discounts[1]); } else if (discount.equals("VIP优惠")) { mt.setDiscount(discounts[2]); } double price = mt.getPrice(); finalPriceField.setText(price + "元"); }); // 添加UI控件到Grid布局中 gridPane.add(movieLabel, 0, 0); gridPane.add(movieComboBox, 1, 0); gridPane.add(discountLabel, 0, 1); gridPane.add(discountComboBox, 1, 1); gridPane.add(priceLabel, 0, 2); gridPane.add(finalPriceLabel, 0, 3); gridPane.add(finalPriceField, 1, 3); gridPane.add(calculateButton, 0, 4, 2, 1); // 创建场景并将Grid布局添加到场景中 Scene scene = new Scene(gridPane, 400, 250); primaryStage.setScene(scene); // 显示窗口 primaryStage.show(); } public static void main(String[] args) { launch(args); }把这段代码的电影票原价改成随选择的电影变化而变化
06-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值